Kenny edit 29/2/2024 - looking into priming more, trying to add semantic similarity. Kenny edit 13/9/2024 - fixing error in random effects
This function is used to load all files in subdirectories of directory.name with a certain prefix (D for dyads, P for pseudo-dyads, S for singles) into a single data file.
load.data.by.prefix <- function(prefix, directory.name) {
# get list of data folders, each folder will have a single csv file in it
dirs <- list.files(directory.name,pattern=paste("^",prefix,"[0123456789]",sep=""),full.names=TRUE)
data <- list();
# loop through and read in all the data files in each dir
for (i in 1:length(dirs)) {
#print(paste(directory.name,"/",dirs[i],"/",sep=""))
files.in.dir.i <- list.files(dirs[i],pattern=".csv", full.names = TRUE)
for (j in 1:length(files.in.dir.i)) {
print(files.in.dir.i[j])
this.data <- read.csv(files.in.dir.i[j], header=TRUE, stringsAsFactors=FALSE);
data[[i]] <- this.data;
}
}
# bind list into dataframe
data <- do.call("rbind",data);
# make participantID a factor for later looping
data$ParticipantID <- factor(data$ParticipantID);
data$Chain <- factor(data$Chain);
data$Stage <- factor(data$Stage)
data}
dyad.data <- load.data.by.prefix("D", "Data/Dyads")
## [1] "Data/Dyads/D300/D300_g01.csv"
## [1] "Data/Dyads/D301/D301_g01.csv"
## [1] "Data/Dyads/D302/D302_g01.csv"
## [1] "Data/Dyads/D303/D303_g01.csv"
## [1] "Data/Dyads/D304/D304_g01.csv"
## [1] "Data/Dyads/D305/D305_g01.csv"
## [1] "Data/Dyads/D306/D306_g01.csv"
## [1] "Data/Dyads/D307/D307_g01.csv"
## [1] "Data/Dyads/D308/D308_g01.csv"
## [1] "Data/Dyads/D309/D309_g01.csv"
## [1] "Data/Dyads/D310/D310_g01.csv"
## [1] "Data/Dyads/D311/D311_g01.csv"
## [1] "Data/Dyads/D312/D312_g01.csv"
## [1] "Data/Dyads/D313/D313_g01.csv"
## [1] "Data/Dyads/D314/D314_g01.csv"
## [1] "Data/Dyads/D315/D315_g01.csv"
## [1] "Data/Dyads/D316/D316_g01.csv"
## [1] "Data/Dyads/D317/D317_g01.csv"
## [1] "Data/Dyads/D318/D318_g01.csv"
## [1] "Data/Dyads/D319/D319_g01.csv"
## [1] "Data/Dyads/D320/D320_g01.csv"
## [1] "Data/Dyads/D321/D321_g01.csv"
## [1] "Data/Dyads/D322/D322_g01.csv"
## [1] "Data/Dyads/D323/D323_g01.csv"
## [1] "Data/Dyads/D324/D324_g01.csv"
## [1] "Data/Dyads/D325/D325_g01.csv"
## [1] "Data/Dyads/D326/D326_g01.csv"
## [1] "Data/Dyads/D327/D327_g01.csv"
## [1] "Data/Dyads/D328/D328_g01.csv"
## [1] "Data/Dyads/D329/D329_g01.csv"
## [1] "Data/Dyads/D330/D330_g01.csv"
## [1] "Data/Dyads/D331/D331_g01.csv"
pseudodyad.data <- load.data.by.prefix("P", "Data/PseudoDyads")
## [1] "Data/PseudoDyads/P200/P200_g01.csv"
## [1] "Data/PseudoDyads/P201/P201_g01.csv"
## [1] "Data/PseudoDyads/P202/P202_g01.csv"
## [1] "Data/PseudoDyads/P203/P203_g01.csv"
## [1] "Data/PseudoDyads/P204/P204_g01.csv"
## [1] "Data/PseudoDyads/P205/P205_g01.csv"
## [1] "Data/PseudoDyads/P206/P206_g01.csv"
## [1] "Data/PseudoDyads/P207/P207_g01.csv"
## [1] "Data/PseudoDyads/P208/P208_g01.csv"
## [1] "Data/PseudoDyads/P209/P209_g01.csv"
## [1] "Data/PseudoDyads/P210/P210_g01.csv"
## [1] "Data/PseudoDyads/P211/P211_g01.csv"
## [1] "Data/PseudoDyads/P212/P212_g01.csv"
## [1] "Data/PseudoDyads/P213/P213_g01.csv"
## [1] "Data/PseudoDyads/P214/P214_g01.csv"
## [1] "Data/PseudoDyads/P215/P215_g01.csv"
## [1] "Data/PseudoDyads/P216/P216_g01.csv"
## [1] "Data/PseudoDyads/P217/P217_g01.csv"
## [1] "Data/PseudoDyads/P218/P218_g01.csv"
## [1] "Data/PseudoDyads/P219/P219_g01.csv"
## [1] "Data/PseudoDyads/P220/P220_g01.csv"
## [1] "Data/PseudoDyads/P223/P223_g01.csv"
## [1] "Data/PseudoDyads/P224/P224_g01.csv"
## [1] "Data/PseudoDyads/P225/P225_g01.csv"
## [1] "Data/PseudoDyads/P226/P226_g01.csv"
## [1] "Data/PseudoDyads/P228/P228_g01.csv"
## [1] "Data/PseudoDyads/P229/P229_g01.csv"
single.data <- load.data.by.prefix("S", "Data/Singles")
## [1] "Data/Singles/S100/S100_g01.csv"
## [1] "Data/Singles/S101/S101_g01.csv"
## [1] "Data/Singles/S102/S102_g01.csv"
## [1] "Data/Singles/S103/S103_g01.csv"
## [1] "Data/Singles/S104/S104_g01.csv"
## [1] "Data/Singles/S105/S105_g01.csv"
## [1] "Data/Singles/S106/S106_g01.csv"
## [1] "Data/Singles/S107/S107_g01.csv"
## [1] "Data/Singles/S108/S108_g01.csv"
## [1] "Data/Singles/S109/S109_g01.csv"
## [1] "Data/Singles/S110/S110_g01.csv"
## [1] "Data/Singles/S111/S111_g01.csv"
## [1] "Data/Singles/S112/S112_g01.csv"
## [1] "Data/Singles/S113/S113_g01.csv"
## [1] "Data/Singles/S114/S114_g01.csv"
## [1] "Data/Singles/S115/S115_g01.csv"
## [1] "Data/Singles/S116/S116_g01.csv"
## [1] "Data/Singles/S117/S117_g01.csv"
## [1] "Data/Singles/S118/S118_g01.csv"
## [1] "Data/Singles/S119/S119_g01.csv"
## [1] "Data/Singles/S120/S120_g01.csv"
## [1] "Data/Singles/S121/S121_g01.csv"
## [1] "Data/Singles/S122/S122_g01.csv"
## [1] "Data/Singles/S123/S123_g01.csv"
## [1] "Data/Singles/S124/S124_g01.csv"
## [1] "Data/Singles/S125/S125_g01.csv"
## [1] "Data/Singles/S126/S126_g01.csv"
## [1] "Data/Singles/S127/S127_g01.csv"
## [1] "Data/Singles/S128/S128_g01.csv"
## [1] "Data/Singles/S129/S129_g01.csv"
## [1] "Data/Singles/S130/S130_g01.csv"
## [1] "Data/Singles/S132/S132_g01.csv"
## [1] "Data/Singles/S133/S133_g01.csv"
Combine into a single data frame.
dyad.data$Condition <- "Dyad"
pseudodyad.data$Condition <- "PseudoDyad"
single.data$Condition <- "Single"
all.data <- rbind(dyad.data, pseudodyad.data,single.data)
Remove leading/trailing whitespace using trim.
#code for removing leading/trailing whitespaces
#from http://stackoverflow.com/questions/2261079/how-to-trim-leading-and-trailing-whitespace-in-r
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
all.data$CheckedLabel <- trim(all.data$CheckedLabel)
Identify grammaticality and particle used, and classify nouns.
animals <- c("cow","dog","elephant","fox","giraffe","hamster","hedgehog","hippo","kangaroo","panda","pig","rabbit","sheep","squirrel","tiger","zebra")
vehicles <- c("ambulance","bike","boat","bus","car","digger","submarine","helicopter","plane","rocket","scooter","tank","tractor","train","truck","van")
particles <- c("bup", "dak", "jeb", "kem", "pag", "tid", "wib", "yav")
verbs <- c("glim", "norg", "frab", "gund", "shen")
This function identifies if there is a particle present and identifies that particle, on the assumption that the 3rd label in a 3-word sequence is the particle (this is run on checked data).
identify.particle <- function(checkedLabel) {
particle <- "0"
if (is.na(checkedLabel)) {
particle <- NA}
else
{words <- strsplit(checkedLabel, ' ')[[1]] #split the label around spaces
if (length(words) == 2) {
particle <- "0"}
else if (length(words) == 3) {
particle <- words[3]
}
}
particle
}
This function identifies the noun category using the Meaning - this is e.g. “straight_cow_1”, “straight_tiger_2”.
identify.category <- function(meaning) {
category <- "None"
if (is.na(meaning)) {
category <- NA}
else
{
words <- strsplit(meaning, '_')[[1]] #split the meaning around underscores
noun <- words[2]
if (is.element(noun,animals)){
category <- "A"}
else if (is.element(noun,vehicles)){
category <- "V"}
}
category
}
This function identifies the noun - this will be the only element in a 1-element meaning (which occurs during noun training), otherwise the second element of meaning.
identify.noun <- function(meaning) {
noun <- "None"
if (is.na(meaning)) {
noun <- NA}
else
{words <- strsplit(meaning, '_')[[1]] #split the meaning around underscores
if (length(words) == 1){
noun <- words}
else if (length(words)>1){
noun <- words[2]}
}
noun
}
This function identifies the number.
identify.number <- function(meaning) {
number <- "None"
if (is.na(meaning)) {
number <- NA}
else
{words <- strsplit(meaning, '_')[[1]] #split around underscores
if (length(words) == 1){
number <- NA}
else if (length(words)>1){
number <- words[3]}
}
number
}
Classifies descriptions as grammatical or not - a grammatical description consists of a verb, a noun and then a particle for plurals.
legal.description <- function(stage,number,checkedLabel) {
if (!(stage %in% c("interactD","interactM","recall1"))) { #ignore anything other than production trials or matcher trials (where label is from partner)
legal = NA}
else {
words <- strsplit(checkedLabel, ' ')[[1]]
if (number==1) {
#for singulars, legal if it is 2 words, first is verb, second is animal or vehicle noun
legal = ((length(words)==2) & (words[1] %in% verbs) & ((words[2] %in% (animals)) | (words[2] %in% (vehicles))))}
else if (number==2) {
#for plurals, legal if it is32 words, first is verb, second is animal or vehicle noun,third is particle
legal = ((length(words)==3) & (words[1] %in% verbs) & ((words[2] %in% (animals)) | (words[2] %in% (vehicles))) & (words[3] %in% particles))}}
legal}
Identify particle in training trials and in labels produced during testing. Since these are in different columns we need to run twice and then update the Particle column for training trials with the particle in TrainingParticle.
all.data$TrainingParticle <- factor(sapply(all.data$TrainingLabel,identify.particle))
all.data$Particle <- factor(sapply(all.data$CheckedLabel,identify.particle))
all.data <- within(all.data, Particle[Stage=="training"] <- TrainingParticle[Stage=="training"])
Identify noun category for training items and test items. Slightly redundant to calculate both TrainingCategory and Category, but since participants could mislabel a noun on test, TrainingCategory is useful for identifying experimental conditions (see below).
all.data$TrainingCategory <- factor(sapply(all.data$Meaning,identify.category))
all.data$Category <- factor(sapply(all.data$Meaning,identify.category))
Identify nouns and number.
all.data$Noun <- factor(sapply(all.data$Meaning,identify.noun))
all.data$Number <- factor(sapply(all.data$Meaning,identify.number))
Classify 1-category vs 2-category conditions
add.condition identifies the condition based on whether
there are one or two categories in training.
add.condition <- function(data) {
data.cond <- data.frame()
for (ch in levels(data$Chain)) {
this.chain <- subset(data, Chain == ch)
this.chain.training <- subset(data, Chain == ch & Stage=="training")
if (length(unique(this.chain.training$TrainingCategory)) == 1){
this.chain$NCategories <- 1}
else if (length(unique(this.chain.training$TrainingCategory)) == 2){
this.chain$NCategories <- 2}
data.cond <- rbind(data.cond,this.chain)
}
return(data.cond)
}
all.data <- add.condition(all.data)
Identify, count and remove illegal descriptions.
all.data$Grammatical <- mapply(function(s,n,l) legal.description(s,n,l),
all.data$Stage,all.data$Number,all.data$CheckedLabel)
Illegal descriptions make up about 1% of the data, and are roughly proportionate across singulars and plurals.
plyr::ddply(all.data,~Grammatical,plyr::summarise,Current_N=length(ParticipantID))
## Grammatical Current_N
## 1 FALSE 572
## 2 TRUE 43588
## 3 NA 29440
plyr::ddply(all.data,~Grammatical+Number,plyr::summarise,Current_N=length(ParticipantID))
## Grammatical Number Current_N
## 1 FALSE 1 170
## 2 FALSE 2 402
## 3 TRUE 1 14550
## 4 TRUE 2 29038
## 5 NA 1 5888
## 6 NA 2 11776
## 7 NA <NA> 11776
Want to leave Grammatical=NA trials in (since those are training trials etc).
all.data.including.ungrammatical <- all.data #need this for convenience later when adding trial numbers for priming analysis
all.data <- subset(all.data,Grammatical | is.na(Grammatical))
plyr::ddply(all.data,~Grammatical,plyr::summarise,Current_N=length(ParticipantID))
## Grammatical Current_N
## 1 TRUE 43588
## 2 NA 29440
Remove rows containing dummy data or pseudo-partner.
all.data.for.proportion.analysis <- subset(all.data,ParticipantID!="dummyP")
all.data.for.proportion.analysis <- subset(all.data.for.proportion.analysis,IP!="127.0.0.1")
Count of participants per condition now dummy participants are removed.
plyr::ddply(all.data.for.proportion.analysis,~Condition+NCategories,plyr::summarise,Current_N=length(unique(ParticipantID)))
## Condition NCategories Current_N
## 1 Dyad 1 32
## 2 Dyad 2 32
## 3 PseudoDyad 1 13
## 4 PseudoDyad 2 14
## 5 Single 1 16
## 6 Single 2 16
NB since the training is 50-50, there is no objective majority particle - this is based on the majority in their productions. If there are two equal-frequency particles, this will classify the alphabetically-first as the majority.
output.data <- data.frame()
for (p in unique(all.data.for.proportion.analysis$ParticipantID)) {
this.participant <- subset(all.data.for.proportion.analysis, ParticipantID == p)
training.data.p <- subset(this.participant, Stage=='training' & Number == 2)
legal.particles <- unique(training.data.p$TrainingParticle)
illegal.particles <- unique(this.participant[!this.participant$Particle %in% legal.particles,]$Particle)
#NB removing trials where the participant did not use a training particle
this.participant <- this.participant[!this.participant$Particle %in% illegal.particles,]
recall.data.p <- subset(this.participant, Stage=='recall1' & Number==2)
interaction.data.p <- subset(this.participant, Stage=='interactD' & Number==2)
Maj_Particle_Recall <- names(which.max(table(recall.data.p$Particle)))
Maj_Particle_Interaction <- names(which.max(table(interaction.data.p$Particle)))
Maj_Particle_Training <- names(which.max(table(training.data.p$TrainingParticle)))
this.participant$MajorityParticle == "NA"
this.participant$MajorityParticle[this.participant$Stage == "recall1"] <- Maj_Particle_Recall
this.participant$MajorityParticle[this.participant$Stage == "interactD"] <- Maj_Particle_Interaction
this.participant$MajorityParticle[this.participant$Stage == "training"] <- Maj_Particle_Training
output.data <- rbind(output.data, this.participant)
}
Once the majority particle is identified, we can categorise each response as using that majority particle or not.
output.data$MajorityParticleUse[output.data$Particle == output.data$MajorityParticle] <- 1
output.data$MajorityParticleUse[output.data$Particle != output.data$MajorityParticle] <- 0
Now identify the subsets of output.data we want - only training, recall, or director trials in interaction; only plural trials; only trials where the MajorityParticle is not NA.
output.data <- subset(output.data, (Stage == "training" | Stage == "recall1" | Stage == "interactD") & Number == 2)
output.data <- output.data[!is.na(output.data$MajorityParticleUse),]
Reorder factor levels to reflect order in experiment.
output.data$Stage <- factor(output.data$Stage, levels=c("training","recall1", "interactD"))
output.data$Stage <- plyr::revalue(output.data$Stage,
c("training"="Training",
"recall1"="Recall",
"interactD"="Interaction"))
output.data$Condition <- factor(output.data$Condition, levels=c("Single","PseudoDyad","Dyad"))
output.data$NCategories <- plyr::revalue(factor(output.data$NCategories),
c("1"="One Category",
"2"="Two Categories"))
Summarize the data for plotting - by-participant proportions.
proportion.data <- aggregate(MajorityParticleUse~Chain+ParticipantID+Condition+NCategories+Stage,data=subset(output.data, (Stage == "Recall" | Stage == "Interaction")),FUN=mean)
Alternative style - bigger points for mean, no grid lines.
ggplot(data=proportion.data) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=MajorityParticleUse),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=MajorityParticleUse),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=MajorityParticleUse, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=0.5, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Proportion of plurals marked with majority marker")
ggsave("Figures/proportions.pdf",width=8,height=6)
Stat: just looking at recall vs interaction.
Condition is helmert-coded so the contrasts are: Single vs PsuedoDyads (Condition1 in the summary tables) (mean of Single-PsuedoDyads) vs Dyads (Condition2 in the summary tables)
Stage and NCategories are sum-coded.
output.data.for.analysis <- subset(output.data,Stage %in% c('Recall','Interaction'))
output.data.for.analysis$Stage <- droplevels(output.data.for.analysis$Stage)
contrasts(output.data.for.analysis$Condition) <- contr.helmert(3)
contrasts(output.data.for.analysis$NCategories) <- -contr.sum(2) #NB - so that Two Categories coded as 1
contrasts(output.data.for.analysis$Stage) <- -contr.sum(2) #NB - so that Interaction coded as 1
proportion.model <- glmer(MajorityParticleUse ~ Condition * Stage * NCategories + (1 + Stage | Chain/ParticipantID),
data=output.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
Model shows effects of:
condition2 (Dyads use majority particle more than the other two conditions)
stage (more use of the majority particle in interaction)
condition2 x stage (effect of stage is bigger in dyads)
[marginal] condition2 x n categories (the effect of being a dyad is smaller in the 2-category condition)
condition2 x stage x n categories (the effect of being a dyad in interaction is smaller in the 2-category condition)
summary(proportion.model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: MajorityParticleUse ~ Condition * Stage * NCategories + (1 +
## Stage | Chain/ParticipantID)
## Data: output.data.for.analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 16833.5 16968.6 -8398.8 16797.5 13434
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7029 -1.1143 0.5642 0.8101 0.9739
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.13885 0.3726
## Stage1 0.04588 0.2142 0.07
## Chain (Intercept) 0.13252 0.3640
## Stage1 0.07740 0.2782 0.93
## Number of obs: 13452, groups: ParticipantID:Chain, 123; Chain, 91
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.632795 0.056697 11.161 < 2e-16 ***
## Condition1 0.024801 0.073196 0.339 0.734735
## Condition2 0.184111 0.037624 4.893 9.91e-07 ***
## Stage1 0.168250 0.041680 4.037 5.42e-05 ***
## NCategories1 -0.054489 0.056554 -0.963 0.335308
## Condition1:Stage1 0.006868 0.053162 0.129 0.897206
## Condition2:Stage1 0.096465 0.027932 3.454 0.000553 ***
## Condition1:NCategories1 0.013512 0.073195 0.185 0.853541
## Condition2:NCategories1 -0.068317 0.037583 -1.818 0.069101 .
## Stage1:NCategories1 -0.063571 0.041463 -1.533 0.125228
## Condition1:Stage1:NCategories1 0.001380 0.053157 0.026 0.979291
## Condition2:Stage1:NCategories1 -0.064668 0.027888 -2.319 0.020403 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cndtn1 Cndtn2 Stage1 NCtgr1 Cn1:S1 Cn2:S1 C1:NC1 C2:NC1
## Condition1 0.072
## Condition2 -0.171 -0.054
## Stage1 0.512 0.037 -0.007
## NCategoris1 -0.025 -0.019 -0.005 -0.025
## Cndtn1:Stg1 0.037 0.469 -0.029 0.069 -0.004
## Cndtn2:Stg1 -0.007 -0.028 0.564 -0.134 -0.013 -0.052
## Cndtn1:NCt1 -0.019 -0.018 0.015 -0.004 0.072 -0.013 0.004
## Cndtn2:NCt1 -0.007 0.015 -0.031 -0.016 -0.176 0.004 -0.036 -0.054
## Stg1:NCtgr1 -0.025 -0.004 -0.013 -0.033 0.511 -0.020 -0.019 0.037 -0.011
## Cnd1:S1:NC1 -0.004 -0.013 0.003 -0.021 0.038 -0.016 0.015 0.469 -0.028
## Cnd2:S1:NC1 -0.016 0.003 -0.036 -0.023 -0.011 0.016 -0.047 -0.028 0.564
## S1:NC1 C1:S1:
## Condition1
## Condition2
## Stage1
## NCategoris1
## Cndtn1:Stg1
## Cndtn2:Stg1
## Cndtn1:NCt1
## Cndtn2:NCt1
## Stg1:NCtgr1
## Cnd1:S1:NC1 0.070
## Cnd2:S1:NC1 -0.142 -0.052
Per condition analysis focussing on whether marker use changes between recall and interaction - running these on the full data, but using treatment coding to get the effect per condition.
output.data.for.analysis$ConditionSinglesRef <- relevel(output.data.for.analysis$Condition,ref="Single")
contrasts(output.data.for.analysis$ConditionSinglesRef) <- NULL
output.data.for.analysis$ConditionPseudosRef <- relevel(output.data.for.analysis$Condition,ref="PseudoDyad")
contrasts(output.data.for.analysis$ConditionPseudosRef) <- NULL
output.data.for.analysis$ConditionDyadsRef <- relevel(output.data.for.analysis$Condition,ref="Dyad")
contrasts(output.data.for.analysis$ConditionDyadsRef) <- NULL
Singles - no effect of Stage.
proportion.model.s <- glmer(MajorityParticleUse ~ ConditionSinglesRef * Stage * NCategories + (1 + Stage | Chain/ParticipantID),
data=output.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
summary(proportion.model.s)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: MajorityParticleUse ~ ConditionSinglesRef * Stage * NCategories +
## (1 + Stage | Chain/ParticipantID)
## Data: output.data.for.analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 16833.5 16968.6 -8398.8 16797.5 13434
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7029 -1.1143 0.5642 0.8101 0.9739
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.13885 0.3726
## Stage1 0.04588 0.2142 0.07
## Chain (Intercept) 0.13252 0.3640
## Stage1 0.07740 0.2782 0.93
## Number of obs: 13452, groups: ParticipantID:Chain, 123; Chain, 91
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) 0.4239102 0.0991008 4.278
## ConditionSinglesRefPseudoDyad 0.0495539 0.1464094 0.338
## ConditionSinglesRefDyad 0.5771027 0.1311321 4.401
## Stage1 0.0649241 0.0720706 0.901
## NCategories1 0.0003090 0.0990887 0.003
## ConditionSinglesRefPseudoDyad:Stage1 0.0137202 0.1063424 0.129
## ConditionSinglesRefDyad:Stage1 0.2962540 0.0968787 3.058
## ConditionSinglesRefPseudoDyad:NCategories1 0.0270434 0.1464074 0.185
## ConditionSinglesRefDyad:NCategories1 -0.1914348 0.1310142 -1.461
## Stage1:NCategories1 -0.0002815 0.0720487 -0.004
## ConditionSinglesRefPseudoDyad:Stage1:NCategories1 0.0027658 0.1063310 0.026
## ConditionSinglesRefDyad:Stage1:NCategories1 -0.1926287 0.0967570 -1.991
## Pr(>|z|)
## (Intercept) 1.89e-05 ***
## ConditionSinglesRefPseudoDyad 0.73502
## ConditionSinglesRefDyad 1.08e-05 ***
## Stage1 0.36767
## NCategories1 0.99751
## ConditionSinglesRefPseudoDyad:Stage1 0.89734
## ConditionSinglesRefDyad:Stage1 0.00223 **
## ConditionSinglesRefPseudoDyad:NCategories1 0.85345
## ConditionSinglesRefDyad:NCategories1 0.14397
## Stage1:NCategories1 0.99688
## ConditionSinglesRefPseudoDyad:Stage1:NCategories1 0.97925
## ConditionSinglesRefDyad:Stage1:NCategories1 0.04650 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CnSRPD CndSRD Stage1 NCtgr1 CnSRPD:S1 CnSRD:S1 CSRPD:N
## CndtnSngRPD -0.677
## CndtnSnglRD -0.754 0.511
## Stage1 0.463 -0.314 -0.348
## NCategoris1 0.005 -0.003 -0.004 -0.009
## CndtSRPD:S1 -0.314 0.469 0.237 -0.677 0.006
## CndtnSRD:S1 -0.343 0.233 0.536 -0.741 0.006 0.504
## CndSRPD:NC1 -0.003 -0.018 0.003 0.006 -0.677 -0.013 -0.004
## CndtSRD:NC1 -0.005 0.003 -0.014 0.005 -0.756 -0.004 -0.027 0.512
## Stg1:NCtgr1 -0.009 0.006 0.007 0.009 0.463 -0.006 -0.007 -0.314
## CSRPD:S1:NC 0.006 -0.013 -0.004 -0.006 -0.314 -0.016 0.005 0.469
## CSRD:S1:NC1 0.005 -0.004 -0.028 -0.009 -0.345 0.005 -0.025 0.233
## CSRD:N S1:NC1 CSRPD:S1:
## CndtnSngRPD
## CndtnSnglRD
## Stage1
## NCategoris1
## CndtSRPD:S1
## CndtnSRD:S1
## CndSRPD:NC1
## CndtSRD:NC1
## Stg1:NCtgr1 -0.350
## CSRPD:S1:NC 0.237 -0.678
## CSRD:S1:NC1 0.536 -0.745 0.504
Pseudodyads - no effect of Stage.
proportion.model.p <- glmer(MajorityParticleUse ~ ConditionPseudosRef * Stage * NCategories + (1 + Stage | Chain/ParticipantID),
data=output.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
summary(proportion.model.p)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: MajorityParticleUse ~ ConditionPseudosRef * Stage * NCategories +
## (1 + Stage | Chain/ParticipantID)
## Data: output.data.for.analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 16833.5 16968.6 -8398.8 16797.5 13434
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7029 -1.1143 0.5642 0.8101 0.9739
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.13885 0.3726
## Stage1 0.04588 0.2142 0.07
## Chain (Intercept) 0.13252 0.3640
## Stage1 0.07740 0.2782 0.93
## Number of obs: 13452, groups: ParticipantID:Chain, 123; Chain, 91
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) 0.473467 0.107783 4.393
## ConditionPseudosRefSingle -0.049576 0.146425 -0.339
## ConditionPseudosRefDyad 0.527551 0.137823 3.828
## Stage1 0.078645 0.078205 1.006
## NCategories1 0.027358 0.107766 0.254
## ConditionPseudosRefSingle:Stage1 -0.013721 0.106324 -0.129
## ConditionPseudosRefDyad:Stage1 0.282532 0.101530 2.783
## ConditionPseudosRefSingle:NCategories1 -0.027049 0.146422 -0.185
## ConditionPseudosRefDyad:NCategories1 -0.218483 0.137732 -1.586
## Stage1:NCategories1 0.002499 0.078178 0.032
## ConditionPseudosRefSingle:Stage1:NCategories1 -0.002791 0.106312 -0.026
## ConditionPseudosRefDyad:Stage1:NCategories1 -0.195407 0.101420 -1.927
## Pr(>|z|)
## (Intercept) 1.12e-05 ***
## ConditionPseudosRefSingle 0.734928
## ConditionPseudosRefDyad 0.000129 ***
## Stage1 0.314599
## NCategories1 0.799603
## ConditionPseudosRefSingle:Stage1 0.897321
## ConditionPseudosRefDyad:Stage1 0.005390 **
## ConditionPseudosRefSingle:NCategories1 0.853440
## ConditionPseudosRefDyad:NCategories1 0.112674
## Stage1:NCategories1 0.974498
## ConditionPseudosRefSingle:Stage1:NCategories1 0.979054
## ConditionPseudosRefDyad:Stage1:NCategories1 0.054017 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndPRS CndPRD Stage1 NCtgr1 CnPRS:S1 CnPRD:S1 CPRS:N CPRD:N
## CndtnPsdsRS -0.736
## CndtnPsdsRD -0.781 0.576
## Stage1 0.473 -0.348 -0.369
## NCategoris1 -0.037 0.027 0.029 -0.017
## CndtnPRS:S1 -0.348 0.469 0.273 -0.735 0.012
## CndtnPRD:S1 -0.364 0.269 0.537 -0.768 0.013 0.566
## CndtPRS:NC1 0.027 -0.018 -0.022 0.012 -0.736 -0.013 -0.010
## CndtPRD:NC1 0.029 -0.022 -0.038 0.012 -0.782 -0.010 -0.031 0.576
## Stg1:NCtgr1 -0.017 0.012 0.013 -0.037 0.474 0.027 0.028 -0.349 -0.371
## CPRS:S1:NC1 0.012 -0.013 -0.010 0.027 -0.348 -0.016 -0.021 0.469 0.273
## CPRD:S1:NC1 0.012 -0.010 -0.031 0.027 -0.365 -0.021 -0.050 0.269 0.536
## S1:NC1 CPRS:S1:
## CndtnPsdsRS
## CndtnPsdsRD
## Stage1
## NCategoris1
## CndtnPRS:S1
## CndtnPRD:S1
## CndtPRS:NC1
## CndtPRD:NC1
## Stg1:NCtgr1
## CPRS:S1:NC1 -0.735
## CPRD:S1:NC1 -0.771 0.567
Dyads - clear effect of Stage.
proportion.model.d <- glmer(MajorityParticleUse ~ ConditionDyadsRef * Stage * NCategories + (1 + Stage | Chain/ParticipantID),
data=output.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
summary(proportion.model.d)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: MajorityParticleUse ~ ConditionDyadsRef * Stage * NCategories +
## (1 + Stage | Chain/ParticipantID)
## Data: output.data.for.analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 16833.5 16968.6 -8398.8 16797.5 13434
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7029 -1.1143 0.5642 0.8101 0.9739
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.13886 0.3726
## Stage1 0.04588 0.2142 0.07
## Chain (Intercept) 0.13252 0.3640
## Stage1 0.07741 0.2782 0.93
## Number of obs: 13452, groups: ParticipantID:Chain, 123; Chain, 91
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) 1.00102 0.08610 11.627
## ConditionDyadsRefSingle -0.57713 0.13112 -4.401
## ConditionDyadsRefPseudoDyad -0.52754 0.13779 -3.828
## Stage1 0.36118 0.06506 5.551
## NCategories1 -0.19112 0.08574 -2.229
## ConditionDyadsRefSingle:Stage1 -0.29626 0.09686 -3.059
## ConditionDyadsRefPseudoDyad:Stage1 -0.28253 0.10153 -2.783
## ConditionDyadsRefSingle:NCategories1 0.19142 0.13100 1.461
## ConditionDyadsRefPseudoDyad:NCategories1 0.21846 0.13770 1.586
## Stage1:NCategories1 -0.19290 0.06458 -2.987
## ConditionDyadsRefSingle:Stage1:NCategories1 0.19261 0.09674 1.991
## ConditionDyadsRefPseudoDyad:Stage1:NCategories1 0.19538 0.10142 1.927
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## ConditionDyadsRefSingle 1.08e-05 ***
## ConditionDyadsRefPseudoDyad 0.000129 ***
## Stage1 2.83e-08 ***
## NCategories1 0.025811 *
## ConditionDyadsRefSingle:Stage1 0.002224 **
## ConditionDyadsRefPseudoDyad:Stage1 0.005388 **
## ConditionDyadsRefSingle:NCategories1 0.143957
## ConditionDyadsRefPseudoDyad:NCategories1 0.112638
## Stage1:NCategories1 0.002816 **
## ConditionDyadsRefSingle:Stage1:NCategories1 0.046478 *
## ConditionDyadsRefPseudoDyad:Stage1:NCategories1 0.054037 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndDRS CnDRPD Stage1 NCtgr1 CnDRS:S1 CnDRPD:S1 CDRS:N
## CndtnDydsRS -0.655
## CndtnDydRPD -0.623 0.408
## Stage1 0.631 -0.413 -0.393
## NCategoris1 -0.041 0.026 0.025 -0.053
## CndtnDRS:S1 -0.422 0.536 0.263 -0.668 0.034
## CndtDRPD:S1 -0.403 0.264 0.537 -0.638 0.033 0.426
## CndtDRS:NC1 0.027 -0.014 -0.016 0.035 -0.654 -0.027 -0.021
## CndDRPD:NC1 0.026 -0.016 -0.038 0.034 -0.623 -0.021 -0.031 0.407
## Stg1:NCtgr1 -0.054 0.034 0.032 -0.072 0.631 0.046 0.044 -0.412
## CDRS:S1:NC1 0.036 -0.028 -0.022 0.048 -0.421 -0.025 -0.030 0.536
## CDRPD:S1:NC 0.034 -0.022 -0.031 0.045 -0.402 -0.029 -0.050 0.263
## CDRPD:N S1:NC1 CDRS:S1:
## CndtnDydsRS
## CndtnDydRPD
## Stage1
## NCategoris1
## CndtnDRS:S1
## CndtDRPD:S1
## CndtDRS:NC1
## CndDRPD:NC1
## Stg1:NCtgr1 -0.393
## CDRS:S1:NC1 0.262 -0.667
## CDRPD:S1:NC 0.536 -0.637 0.425
For the dyads, is the effect of stage significant even if you just look at the 2-category guys? The models above suggest the effect is about half as big as in the 1-category condition. We can check by using treatment coding on NCategories too, with Two Categories set as the reference level. This model shows a marginal effect (p=.057) of Stage, i.e. the 2-category dyads are probably more regular at interaction.
output.data.for.analysis$NCategories2Ref <- relevel(output.data.for.analysis$NCategories,ref="Two Categories")
contrasts(output.data.for.analysis$NCategories2Ref) <- NULL
proportion.model.d.2 <- glmer(MajorityParticleUse ~ ConditionDyadsRef * Stage * NCategories2Ref + (1 + Stage | Chain/ParticipantID),
data=output.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
summary(proportion.model.d.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: MajorityParticleUse ~ ConditionDyadsRef * Stage * NCategories2Ref +
## (1 + Stage | Chain/ParticipantID)
## Data: output.data.for.analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 16833.5 16968.6 -8398.8 16797.5 13434
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7029 -1.1143 0.5642 0.8101 0.9739
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.13885 0.3726
## Stage1 0.04588 0.2142 0.07
## Chain (Intercept) 0.13252 0.3640
## Stage1 0.07740 0.2782 0.93
## Number of obs: 13452, groups: ParticipantID:Chain, 123; Chain, 91
##
## Fixed effects:
## Estimate
## (Intercept) 0.80989
## ConditionDyadsRefSingle -0.38570
## ConditionDyadsRefPseudoDyad -0.30906
## Stage1 0.16827
## NCategories2RefOne Category 0.38225
## ConditionDyadsRefSingle:Stage1 -0.10364
## ConditionDyadsRefPseudoDyad:Stage1 -0.08714
## ConditionDyadsRefSingle:NCategories2RefOne Category -0.38286
## ConditionDyadsRefPseudoDyad:NCategories2RefOne Category -0.43693
## Stage1:NCategories2RefOne Category 0.38581
## ConditionDyadsRefSingle:Stage1:NCategories2RefOne Category -0.38524
## ConditionDyadsRefPseudoDyad:Stage1:NCategories2RefOne Category -0.39077
## Std. Error
## (Intercept) 0.11887
## ConditionDyadsRefSingle 0.18398
## ConditionDyadsRefPseudoDyad 0.19081
## Stage1 0.08827
## NCategories2RefOne Category 0.17136
## ConditionDyadsRefSingle:Stage1 0.13515
## ConditionDyadsRefPseudoDyad:Stage1 0.13974
## ConditionDyadsRefSingle:NCategories2RefOne Category 0.26199
## ConditionDyadsRefPseudoDyad:NCategories2RefOne Category 0.27509
## Stage1:NCategories2RefOne Category 0.12909
## ConditionDyadsRefSingle:Stage1:NCategories2RefOne Category 0.19350
## ConditionDyadsRefPseudoDyad:Stage1:NCategories2RefOne Category 0.20264
## z value Pr(>|z|)
## (Intercept) 6.813 9.55e-12
## ConditionDyadsRefSingle -2.096 0.0360
## ConditionDyadsRefPseudoDyad -1.620 0.1053
## Stage1 1.906 0.0566
## NCategories2RefOne Category 2.231 0.0257
## ConditionDyadsRefSingle:Stage1 -0.767 0.4431
## ConditionDyadsRefPseudoDyad:Stage1 -0.624 0.5329
## ConditionDyadsRefSingle:NCategories2RefOne Category -1.461 0.1439
## ConditionDyadsRefPseudoDyad:NCategories2RefOne Category -1.588 0.1122
## Stage1:NCategories2RefOne Category 2.989 0.0028
## ConditionDyadsRefSingle:Stage1:NCategories2RefOne Category -1.991 0.0465
## ConditionDyadsRefPseudoDyad:Stage1:NCategories2RefOne Category -1.928 0.0538
##
## (Intercept) ***
## ConditionDyadsRefSingle *
## ConditionDyadsRefPseudoDyad
## Stage1 .
## NCategories2RefOne Category *
## ConditionDyadsRefSingle:Stage1
## ConditionDyadsRefPseudoDyad:Stage1
## ConditionDyadsRefSingle:NCategories2RefOne Category
## ConditionDyadsRefPseudoDyad:NCategories2RefOne Category
## Stage1:NCategories2RefOne Category **
## ConditionDyadsRefSingle:Stage1:NCategories2RefOne Category *
## ConditionDyadsRefPseudoDyad:Stage1:NCategories2RefOne Category .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndDRS CnDRPD Stage1 NC2ROC CDRS:S CDRPD: CDRS:NC CDRPD:NC
## CndtnDydsRS -0.646
## CndtnDydRPD -0.622 0.402
## Stage1 0.612 -0.395 -0.380
## NCtgrs2RfOC -0.690 0.447 0.430 -0.421
## CndtnDRS:S1 -0.399 0.519 0.248 -0.653 0.276
## CndtDRPD:S1 -0.386 0.249 0.528 -0.630 0.266 0.412
## CDRS:NC2ROC 0.452 -0.702 -0.281 0.275 -0.654 -0.364 -0.174
## CDRPD:NC2RC 0.429 -0.278 -0.693 0.262 -0.622 -0.172 -0.365 0.407
## Stg1:NC2ROC -0.415 0.269 0.259 -0.678 0.630 0.445 0.429 -0.412 -0.392
## CDRS:S1:NCC 0.277 -0.362 -0.173 0.453 -0.421 -0.698 -0.286 0.536 0.262
## CDRPD:S1:NC 0.264 -0.171 -0.363 0.432 -0.401 -0.283 -0.688 0.262 0.535
## S1:NCC CDRS:SC
## CndtnDydsRS
## CndtnDydRPD
## Stage1
## NCtgrs2RfOC
## CndtnDRS:S1
## CndtDRPD:S1
## CDRS:NC2ROC
## CDRPD:NC2RC
## Stg1:NC2ROC
## CDRS:S1:NCC -0.667
## CDRPD:S1:NC -0.637 0.425
For Singles and Pseudodyads this can be read off from the proportion plot since their partner sticks at 50-50 marker use - but for genuine dyads, it is useful to know if they are using the same marker as their partner. For this we can calculate a per-pair (rather than per-individual) majority marker, express each individual in terms of how they use that marker, then calculate within-pair differences.
convergence.data <- data.frame()
for (c in unique(subset(all.data.for.proportion.analysis,Condition=="Dyad")$Chain)) {
this.chain <- subset(all.data.for.proportion.analysis, Chain == c)
training.data.c <- subset(this.chain, Stage=='training' & Number == 2)
legal.particles <- unique(training.data.c$TrainingParticle)
illegal.particles <- unique(this.chain[!this.chain$Particle %in% legal.particles,]$Particle)
#NB removing trials where the participant did not use a training particle
this.chain <- this.chain[!this.chain$Particle %in% illegal.particles,]
recall.data.c <- subset(this.chain, Stage=='recall1' & Number==2)
interaction.data.c <- subset(this.chain, Stage=='interactD' & Number==2)
Maj_Particle_Recall <- names(which.max(table(recall.data.c$Particle)))
Maj_Particle_Interaction <- names(which.max(table(interaction.data.c$Particle)))
Maj_Particle_Training <- names(which.max(table(training.data.c$TrainingParticle)))
this.chain$MajorityParticle == "NA"
this.chain$MajorityParticle[this.chain$Stage == "recall1"] <- Maj_Particle_Recall
this.chain$MajorityParticle[this.chain$Stage == "interactD"] <- Maj_Particle_Interaction
this.chain$MajorityParticle[this.chain$Stage == "training"] <- Maj_Particle_Training
convergence.data <- rbind(convergence.data, this.chain)
}
convergence.data$MajorityParticleUse[convergence.data$Particle == convergence.data$MajorityParticle] <- 1
convergence.data$MajorityParticleUse[convergence.data$Particle != convergence.data$MajorityParticle] <- 0
convergence.data <- subset(convergence.data, (Stage == "training" | Stage == "recall1" | Stage == "interactD") & Number == 2)
convergence.data <- convergence.data[!is.na(convergence.data$MajorityParticleUse),]
convergence.data$Stage <- factor(convergence.data$Stage, levels=c("training","recall1", "interactD"))
convergence.data$Condition <- factor(convergence.data$Condition, levels=c("Single", "Dyad","PseudoDyad"))
convergence.data$NCategories <- plyr::revalue(factor(convergence.data$NCategories),
c("1"="One Category",
"2"="Two Categories"))
convergence.data$Stage <- plyr::revalue(convergence.data$Stage,
c("training"="Training",
"recall1"="Recall",
"interactD"="Interaction"))
Now calculate per-participant proportions of using that majority marker.
pair.maj.proportion.data <- aggregate(MajorityParticleUse~Chain+ParticipantID+Condition+NCategories+Stage,data=subset(convergence.data, (Stage == "Recall" | Stage == "Interaction")),FUN=mean)
Plot this - anyone using a different majority marker from their partner will show up under 0.5. Key thing here is that nearly everyone is above 0.5 (i.e. aligned on which marker is the majority), and the regularisers in the One Category condition are all at 1, i.e. they have regularised on the same marker as their partner.
ggplot(data=pair.maj.proportion.data) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=MajorityParticleUse),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=MajorityParticleUse),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=MajorityParticleUse, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=0.5, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours[3]) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Proportion of plurals marked with pair majority marker")
Now calculate within-pair difference and plot.
pair.maj.difference.data <- aggregate(MajorityParticleUse~Chain+Condition+NCategories+Stage,data=pair.maj.proportion.data,FUN=function(d) max(d)-min(d))
ggplot(data=pair.maj.difference.data) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=MajorityParticleUse),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=MajorityParticleUse),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=MajorityParticleUse, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=0.5, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours[3]) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Within-pair difference in proportion\nof plurals marked with pair majority marker")
General functions for entropy (using the entropy library where possible).
#particle data can basically be a list of any kind of item (e.g. c(1,2,3,1,1), or a list
#of characters, or whatever - NB it is *not* counts
particle.entropy <- function(particle.data) {
particle.table <- table(particle.data) #calculate counts
entropy::entropy(as.vector(particle.table),method="ML",unit="log2")
}
# particle.data is a data frame with one column containing the conditioning factor and
# the other column containing the particles, e.g.
# Category Particle
# Animal bup
# Animal bup
# Vehicle dax
# Vehicle dax
particle.mi <- function(particle.data) {
particle.table <- table(particle.data) #calculate counts
entropy::mi.empirical(particle.table,unit="log2")
}
# this is not built into the entropy library, but given that
# MI(X,Y) = H(X) - H(X|Y)
# -H(X|Y) = MI(X,Y)-H(X)
# H(X|Y) = -[MI(X,Y)-H(X)]
particle.conditional.entropy <- function(particle.data) {
mi <- particle.mi(particle.data)
h <- particle.entropy(particle.data$Particle)
-(mi-h)
}
binom.entropy <- function(x, n) {
entropy::entropy(c(x/n,(n-x)/n),method="ML",unit="log2")
}
reassign.labels randomises marker assignments with
respect to either class or lexical conditioning/Mutual Information,
while leaving the other level of conditioning unaltered - this allows us
to generate randomisations that have e.g. the same level of lexical
conditioning but which differ in class conditioning, allowing us to use
those randomisations to assess whether the observed level of class
conditioning is surprising given the amount of lexical
conditioning.
Scrambling lexical conditioning while preserving class conditioning is easy - to generate a randomisation we simply randomise the particle assignment to nouns within each class, which leaves the distribution within each class (and therefore the class-based conditioning) unaltered.
Scrambling class conditioning while preserving lexical conditioning unaltered is slightly more complex, and involves shuffling the assignment of nouns to classes - e.g. we might reassign “dog” to the vehicle class and “car” to the animal class. This potentially changes the distribution of markers within each class, but leaves the distribution of markers per noun unaltered.
#helper function - takes a set of data, fully shuffles assignment of particles to nouns
#if type = class_ce or class_mi then words are shuffled across categories, i.e this generates randomisations with the same lexical ce/mi as the original
#if type = lexical_ce or lexical_mi then markers are shuffled within categories, i.e. this generates randomisations with the same class ce/mi as the original
reassign.labels <- function(this.p.data,type){
if (type=="class_ce" || type=="class_mi") {
#here, need to re-assign the category each noun is assigned to
#work out current category assignments
current.category.assignments <- unique(this.p.data[,c("Category","Noun")])
current.category.assignments.shuffled <- data.frame("Noun"=current.category.assignments$Noun,"Category"=sample(current.category.assignments$Category))
#apply new category labels by merging based on Noun
this.p.data.reassigned <- merge(this.p.data[,c("Noun","Particle")],current.category.assignments.shuffled,by="Noun")
this.p.data.reassigned
}
else if (type=="lexical_ce" || type=="lexical_mi") {
#for this, just need to shuffle A and V categories seperately
this.p.data.A <- subset(this.p.data,Category=="A")
this.p.data.V <- subset(this.p.data,Category=="V")
this.p.data.A.shuffle <- cbind(this.p.data.A[,c("Category","Noun")],sample(this.p.data.A$Particle))
colnames(this.p.data.A.shuffle)<-c("Category","Noun","Particle")
this.p.data.V.shuffle <- cbind(this.p.data.V[,c("Category","Noun")],sample(this.p.data.V$Particle))
colnames(this.p.data.V.shuffle)<-c("Category","Noun","Particle")
this.p.data.reassigned <- rbind(this.p.data.A.shuffle,this.p.data.V.shuffle)
this.p.data.reassigned
}
}
#helper function - calculates entropy values of various types
calculate.entropy <- function(this.data,type) {
if (type=="class_ce") {
relevant.data <- this.data[,c("Category","Particle")]
entropy <- particle.conditional.entropy(relevant.data)
}
else if (type=="lexical_ce") {
relevant.data <- this.data[,c("Noun","Particle")]
entropy <- particle.conditional.entropy(relevant.data)}
else if (type=="class_mi") {
relevant.data <- this.data[,c("Category","Particle")]
entropy <- particle.mi(relevant.data)
}
else if (type=="lexical_mi") {
relevant.data <- this.data[,c("Noun","Particle")]
entropy <- particle.mi(relevant.data)}
entropy
}
Monte Carlo tests. monte returns three values: -
veridical, which is the actual entropy of the sample - mean, which is
the mean of the mc distribution - gives an estimate of chance level of
expected conditional entropy - p_lower, which is the probability of
observing mc values <= the veridical entropy - p_higher, which is the
probability of observing mc values >= the veridical entropy (NB need
both since we might want to do this with MI, where high (rather than
low) values are surprising).
monte <- function(this.p.data,type) {
veridical.entropy <- calculate.entropy(this.p.data,type)
msample <- replicate(mc.trials,calculate.entropy(reassign.labels(this.p.data,type),type))
n_lower=0
n_higher=0
for (i in 1:mc.trials) {
if(msample[i]<=veridical.entropy) {n_lower=n_lower+1}
if(msample[i]>=veridical.entropy) {n_higher=n_higher+1}
}
list(veridical = veridical.entropy,
mean = mean(msample),
p_lower = n_lower/mc.trials,
p_higher = n_higher/mc.trials)
}
#similar idea, but this handles mc stuff with total entropy
#we want to know, given a participant who produces n data points, what is the expected entropy if they produce det1 with probability p1?
#and how many of those random samples would have same or lower entropy than the veridical?
total.entropy.monte <- function(n,p1,veridical.entropy,trials) {
msample <- replicate(trials,binom.entropy(rbinom(1,n,p1),n))
c=0
for (i in 1:trials) {
if(msample[i]<=veridical.entropy) c=c+1
}
list(mean=mean(msample),p=c/trials)
}
Calculate entropy values for the data.
entropy.data <- NULL
suppressWarnings(for (this.condition in c("Dyad","PseudoDyad","Single")) {
for (this.stage in c("Recall","Interaction")) {
this.data <- subset(output.data, Condition==this.condition & Stage==this.stage)
for (this.chain in levels(droplevels(this.data$Chain))) {
for (this.generation in 1:max(subset(this.data, Chain==this.chain)$Generation)) {
this.generation.data <- subset(this.data,Chain==this.chain & Generation==this.generation)
for (this.participant in levels(droplevels(this.generation.data$ParticipantID))) {
#print(this.participant)
this.n.categories <- unique(this.generation.data$NCategories)
this.p.data <- subset(this.generation.data,ParticipantID==this.participant)
total.entropy <- particle.entropy(this.p.data$Particle)
total.entropy.chain <- particle.entropy(this.generation.data$Particle)
#only want MI here
#conditional.entropy.class.mc <- monte(this.p.data,"class_ce")
#conditional.entropy.class.chain.mc <- monte(this.generation.data,"class_ce")
#conditional.entropy.lexical.mc <- monte(this.p.data,"lexical_ce")
#conditional.entropy.lexical.chain.mc <- monte(this.generation.data,"lexical_ce")
mutual.information.class.mc <- monte(this.p.data,"class_mi")
mutual.information.class.chain.mc <- monte(this.generation.data,"class_mi")
mutual.information.lexical.mc <- monte(this.p.data,"lexical_mi")
mutual.information.lexical.chain.mc <- monte(this.generation.data,"lexical_mi")
this.entropy.data <- data.frame(
"Condition"=this.condition,
"NCategories"=this.n.categories,
"Chain"=this.chain,
"Generation"=this.generation,
"Participant"=this.participant,
"Stage"=this.stage,
"Measure"=c(#individual-based measures
"IndividualTotalEntropy",
#"IndividualClassConditionalEntropy",
#"IndividualLexicalConditionalEntropy",
"IndividualMutualInformationClass",
"IndividualMutualInformationLexical",
#pair-based measures
"PairTotalEntropy",
#"PairClassConditionalEntropy",
#"PairLexicalConditionalEntropy",
"PairMutualInformationClass",
"PairMutualInformationLexical",
#measures derived from the MC shuffled sample
#"ChanceIndividualClassConditionalEntropy",
#"PLowerIndividualClassConditionalEntropy",
#"ChanceIndividualLexicalConditionalEntropy",
#"PLowerIndividualLexicalConditionalEntropy",
"ChanceIndividualMutualInformationClass",
"PHigherIndividualMutualInformationClass",
"ChanceIndividualMutualInformationLexical",
"PHigherIndividualMutualInformationLexical",
"PHigherPairMutualInformationClass"),
"Entropy"=c(#individual-based measures
total.entropy,# "IndividualTotalEntropy",
#conditional.entropy.class.mc$veridical, #"IndividualClassConditionalEntropy",
#conditional.entropy.lexical.mc$veridical, #"IndividualLexicalConditionalEntropy",
mutual.information.class.mc$veridical, #"IndividualMutualInformationClass",
mutual.information.lexical.mc$veridical, #"IndividualMutualInformationLexical",
#pair-based measures
total.entropy.chain, #"PairTotalEntropy",
#conditional.entropy.class.chain.mc$veridical, #"PairClassConditionalEntropy",
#conditional.entropy.lexical.chain.mc$veridical, #"PairLexicalConditionalEntropy",
mutual.information.class.chain.mc$veridical, #"PairMutualInformationClass",
mutual.information.lexical.chain.mc$veridical, #"PairMutualInformationLexical",
#measures derived from the MC shuffled sample
#conditional.entropy.class.mc$mean, #"ChanceIndividualClassConditionalEntropy",
#conditional.entropy.class.mc$p_lower, #"PLowerIndividualClassConditionalEntropy",
#conditional.entropy.lexical.mc$mean, #"ChanceIndividualLexicalConditionalEntropy",
#conditional.entropy.lexical.mc$p_lower, #"PLowerIndividualLexicalConditionalEntropy",
mutual.information.class.mc$mean, #"ChanceIndividualMutualInformationClass",
mutual.information.class.mc$p_higher, #"PHigherIndividualMutualInformationClass",
mutual.information.lexical.mc$mean, #"ChanceIndividualMutualInformationLexical",
mutual.information.lexical.mc$p_higher, #"PHigherIndividualMutualInformationLexical"
#I also want this one
mutual.information.class.chain.mc$p_higher #"PHigherPairMutualInformationClass",
))
entropy.data <- rbind(entropy.data,this.entropy.data)
}
}
}
}
})
entropy.data$Condition <- factor(entropy.data$Condition, levels=c("Single","PseudoDyad","Dyad"))
entropy.data$Stage <- factor(entropy.data$Stage, levels=c("Recall","Interaction"))
Plot total entropy - no point running a stat on this since it’s the same as the proportion data, just transformed.
ggplot(data=subset(entropy.data,Measure=="IndividualTotalEntropy")) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=0.5, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Entropy of individual productions")
Plot class MI (for two-category conditions only), to look for class-based conditioning.
ggplot(data=subset(entropy.data,Measure=="IndividualMutualInformationClass" & NCategories=="Two Categories")) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=1, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, category")
Same plot with a visual representation of non-chance conditioning.
First, need to get info from PHigherIndividualMutualInformationClass into same row as IndividualMutualInformationClass
entropy.data.class.mi <- subset(entropy.data,Measure=="IndividualMutualInformationClass" & NCategories=="Two Categories")
entropy.data.class.mi.p <- subset(entropy.data,Measure=="PHigherIndividualMutualInformationClass" & NCategories=="Two Categories")[,c("Participant","Stage","Entropy")]
entropy.data.class.mi.combined <- merge(entropy.data.class.mi,entropy.data.class.mi.p,by=c("Participant","Stage"))
entropy.data.class.mi.combined <- plyr::rename(entropy.data.class.mi.combined,c("Entropy.x"="ClassMI","Entropy.y"="p"))
entropy.data.class.mi.combined$Significant <- mapply(function(p) p<.05,
entropy.data.class.mi.combined$p)
Then use Significant to set dotplot line colour.
ggplot(data=entropy.data.class.mi.combined) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=ClassMI),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=ClassMI),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=ClassMI, fill=Condition, colour=Significant),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=1, alpha=0.5) +
theme_bw() +
scale_colour_manual(values=c("NA","black")) +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, category")
ggsave("Figures/class_mi.pdf",width=8,height=3)
Stat on class-based conditioning.
entropy.data.for.analysis <- subset(entropy.data,Stage %in% c('Recall','Interaction'))
entropy.data.for.analysis$Stage <- droplevels(factor(entropy.data.for.analysis$Stage))
contrasts(entropy.data.for.analysis$Condition) <- contr.helmert(3)
contrasts(entropy.data.for.analysis$NCategories) <- -contr.sum(2) #NB - so that Two Categories coded as 1
contrasts(entropy.data.for.analysis$Stage) <- -contr.sum(2) #NB - so that Interaction coded as 1
#not appropriate as reduces to 1 number per Participant per Stage, i.e. linear model required
#class.mi.model <- lmerTest::lmer(Entropy ~ Condition * Stage + (1 + Stage | Chain/Participant),data=subset(entropy.data.for.analysis,Measure=='IndividualMutualInformationClass' & NCategories=="Two Categories"))
# singular
#class.mi.model <- lmerTest::lmer(Entropy ~ Condition * Stage + (1 + Stage | Chain),data=subset(entropy.data.for.analysis,Measure=='IndividualMutualInformationClass' & NCategories=="Two Categories"))
#also singular
#class.mi.model <- lmerTest::lmer(Entropy ~ Condition * Stage + (1 | Chain/Participant),data=subset(entropy.data.for.analysis,Measure=='IndividualMutualInformationClass' & NCategories=="Two Categories"))
class.mi.model <- lmerTest::lmer(Entropy ~ Condition * Stage + (1 | Chain),data=subset(entropy.data.for.analysis,Measure=='IndividualMutualInformationClass' & NCategories=="Two Categories"))
No effects of anything! MI is non-zero overall, but barely.
summary(class.mi.model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Entropy ~ Condition * Stage + (1 | Chain)
## Data:
## subset(entropy.data.for.analysis, Measure == "IndividualMutualInformationClass" &
## NCategories == "Two Categories")
##
## REML criterion at convergence: -45.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5470 -0.4118 -0.2306 0.0020 4.5691
##
## Random effects:
## Groups Name Variance Std.Dev.
## Chain (Intercept) 0.005735 0.07573
## Residual 0.026342 0.16230
## Number of obs: 124, groups: Chain, 46
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.100678 0.019155 46.367875 5.256 3.64e-06 ***
## Condition1 -0.013344 0.025160 57.881359 -0.530 0.598
## Condition2 -0.015056 0.012486 34.812858 -1.206 0.236
## Stage1 -0.002068 0.015548 73.633102 -0.133 0.895
## Condition1:Stage1 0.004516 0.021000 73.633102 0.215 0.830
## Condition2:Stage1 0.008115 0.009733 73.633102 0.834 0.407
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cndtn1 Cndtn2 Stage1 Cn1:S1
## Condition1 0.058
## Condition2 -0.230 -0.045
## Stage1 0.000 0.000 0.000
## Cndtn1:Stg1 0.000 0.000 0.000 0.060
## Cndtn2:Stg1 0.000 0.000 0.000 -0.345 -0.048
Is there lexical conditioning though? Plot lexical MI to see.
ggplot(data=subset(entropy.data,Measure=="IndividualMutualInformationLexical")) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=1, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, lexical")
Same idea as above, adding info on p-value from mc test.
#first, need to get info from PHigherIndividualMutualInformationLexical into same row as IndividualMutualInformationLexical
entropy.data.lexical.mi <- subset(entropy.data,Measure=="IndividualMutualInformationLexical")
entropy.data.lexical.mi.p <- subset(entropy.data,Measure=="PHigherIndividualMutualInformationLexical")[,c("Participant","Stage","Entropy")]
entropy.data.lexical.mi.combined <- merge(entropy.data.lexical.mi,entropy.data.lexical.mi.p,by=c("Participant","Stage"))
entropy.data.lexical.mi.combined <- plyr::rename(entropy.data.lexical.mi.combined,c("Entropy.x"="LexicalMI","Entropy.y"="p"))
entropy.data.lexical.mi.combined$Significant <- mapply(function(p) p<.05,
entropy.data.lexical.mi.combined$p)
ggplot(data=entropy.data.lexical.mi.combined) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=LexicalMI),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=LexicalMI),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=LexicalMI, fill=Condition, colour=Significant),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=1, alpha=0.5) +
theme_bw() +
scale_colour_manual(values=c("NA","black")) +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, lexical")
ggsave("Figures/lexical_mi.pdf",width=8,height=6)
Lexical MI model.
#same logic as above - 1 + Stage in random effects will not work
lexical.mi.model <- lmerTest::lmer(Entropy ~ Condition * Stage * NCategories + (1 | Chain/Participant), data=subset(entropy.data.for.analysis,Measure=='IndividualMutualInformationLexical'))
Model shows:
intercept is significant (lexical MI is clearly non-zero)
stage (lexical conditioning declines in interaction)
condition1 x stage (singles and pseudodyads differ in what happens in interaction - pseudodyads don’t show the reduction in lexical conditioning)
condition2 x stage x n categories (2-category dyads don’t show the drop in lexical conditioning in interaction seen elsewhere)
summary(lexical.mi.model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Entropy ~ Condition * Stage * NCategories + (1 | Chain/Participant)
## Data:
## subset(entropy.data.for.analysis, Measure == "IndividualMutualInformationLexical")
##
## REML criterion at convergence: 15.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.04419 -0.53397 -0.06307 0.52827 2.70203
##
## Random effects:
## Groups Name Variance Std.Dev.
## Participant:Chain (Intercept) 0.02717 0.16483
## Chain (Intercept) 0.00262 0.05119
## Residual 0.02599 0.16120
## Number of obs: 246, groups: Participant:Chain, 123; Chain, 91
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 0.417359 0.020092 109.296679 20.773
## Condition1 0.019381 0.027035 116.627749 0.717
## Condition2 -0.011793 0.012651 87.270641 -0.932
## Stage1 -0.023254 0.011010 117.000134 -2.112
## NCategories1 -0.003702 0.020092 109.296679 -0.184
## Condition1:Stage1 0.033271 0.014899 117.000134 2.233
## Condition2:Stage1 0.001547 0.006872 117.000134 0.225
## Condition1:NCategories1 -0.041682 0.027035 116.627749 -1.542
## Condition2:NCategories1 0.015052 0.012651 87.270641 1.190
## Stage1:NCategories1 -0.001762 0.011010 117.000134 -0.160
## Condition1:Stage1:NCategories1 -0.009840 0.014899 117.000134 -0.660
## Condition2:Stage1:NCategories1 0.021074 0.006872 117.000134 3.067
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Condition1 0.47488
## Condition2 0.35380
## Stage1 0.03680 *
## NCategories1 0.85416
## Condition1:Stage1 0.02745 *
## Condition2:Stage1 0.82225
## Condition1:NCategories1 0.12585
## Condition2:NCategories1 0.23733
## Stage1:NCategories1 0.87309
## Condition1:Stage1:NCategories1 0.51028
## Condition2:Stage1:NCategories1 0.00269 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cndtn1 Cndtn2 Stage1 NCtgr1 Cn1:S1 Cn2:S1 C1:NC1 C2:NC1
## Condition1 0.077
## Condition2 -0.329 -0.061
## Stage1 0.000 0.000 0.000
## NCategoris1 -0.016 -0.018 0.013 0.000
## Cndtn1:Stg1 0.000 0.000 0.000 0.077 0.000
## Cndtn2:Stg1 0.000 0.000 0.000 -0.354 0.000 -0.062
## Cndtn1:NCt1 -0.018 -0.020 0.014 0.000 0.077 0.000 0.000
## Cndtn2:NCt1 0.013 0.014 -0.010 0.000 -0.329 0.000 0.000 -0.061
## Stg1:NCtgr1 0.000 0.000 0.000 -0.016 0.000 -0.018 0.013 0.000 0.000
## Cnd1:S1:NC1 0.000 0.000 0.000 -0.018 0.000 -0.020 0.015 0.000 0.000
## Cnd2:S1:NC1 0.000 0.000 0.000 0.013 0.000 0.015 -0.010 0.000 0.000
## S1:NC1 C1:S1:
## Condition1
## Condition2
## Stage1
## NCategoris1
## Cndtn1:Stg1
## Cndtn2:Stg1
## Cndtn1:NCt1
## Cndtn2:NCt1
## Stg1:NCtgr1
## Cnd1:S1:NC1 0.077
## Cnd2:S1:NC1 -0.354 -0.062
However, that analysis includes several one-category pairs who have 0 variation - what do the plots and stats look like if we exclude those? That will allow us to see what pairs exhibiting variable behaviour during interaction look like.
Can identify fully-regular pairs using convergence.data (calculated earlier).
fully.regular.individuals <- subset(proportion.data,MajorityParticleUse==1)$ParticipantID
highly.regular.individuals <- subset(proportion.data,MajorityParticleUse>.95)$ParticipantID
Plot and stat without full regularisers.
ggplot(data=subset(entropy.data,Measure=="IndividualMutualInformationLexical" & !(Participant %in% fully.regular.individuals))) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=1, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, lexical")
The model without the full regularisers still shows the 3-way interaction.
lexical.mi.model.notfull <- lmerTest::lmer(Entropy ~ Condition * Stage * NCategories + (1 | Chain/Participant),
data=subset(entropy.data.for.analysis,Measure=='IndividualMutualInformationLexical' & !(Participant %in% fully.regular.individuals)))
## boundary (singular) fit: see help('isSingular')
summary(lexical.mi.model.notfull)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Entropy ~ Condition * Stage * NCategories + (1 | Chain/Participant)
## Data:
## subset(entropy.data.for.analysis, Measure == "IndividualMutualInformationLexical" &
## !(Participant %in% fully.regular.individuals))
##
## REML criterion at convergence: 4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0957 -0.5511 -0.1042 0.5809 2.6058
##
## Random effects:
## Groups Name Variance Std.Dev.
## Participant:Chain (Intercept) 0.02730 0.1652
## Chain (Intercept) 0.00000 0.0000
## Residual 0.02441 0.1562
## Number of obs: 226, groups: Participant:Chain, 113; Chain, 88
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 4.334e-01 1.957e-02 1.070e+02 22.144
## Condition1 1.938e-02 2.598e-02 1.070e+02 0.746
## Condition2 4.222e-03 1.257e-02 1.070e+02 0.336
## Stage1 -2.037e-02 1.088e-02 1.070e+02 -1.872
## NCategories1 -1.867e-02 1.957e-02 1.070e+02 -0.954
## Condition1:Stage1 3.327e-02 1.444e-02 1.070e+02 2.304
## Condition2:Stage1 4.437e-03 6.988e-03 1.070e+02 0.635
## Condition1:NCategories1 -4.168e-02 2.598e-02 1.070e+02 -1.605
## Condition2:NCategories1 8.157e-05 1.257e-02 1.070e+02 0.006
## Stage1:NCategories1 -7.909e-03 1.088e-02 1.070e+02 -0.727
## Condition1:Stage1:NCategories1 -9.840e-03 1.444e-02 1.070e+02 -0.681
## Condition2:Stage1:NCategories1 1.493e-02 6.988e-03 1.070e+02 2.136
## Pr(>|t|)
## (Intercept) <2e-16 ***
## Condition1 0.4573
## Condition2 0.7377
## Stage1 0.0639 .
## NCategories1 0.3422
## Condition1:Stage1 0.0231 *
## Condition2:Stage1 0.5269
## Condition1:NCategories1 0.1115
## Condition2:NCategories1 0.9948
## Stage1:NCategories1 0.4688
## Condition1:Stage1:NCategories1 0.4970
## Condition2:Stage1:NCategories1 0.0350 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cndtn1 Cndtn2 Stage1 NCtgr1 Cn1:S1 Cn2:S1 C1:NC1 C2:NC1
## Condition1 0.076
## Condition2 -0.272 -0.059
## Stage1 0.000 0.000 0.000
## NCategoris1 -0.048 -0.018 -0.038 0.000
## Cndtn1:Stg1 0.000 0.000 0.000 0.076 0.000
## Cndtn2:Stg1 0.000 0.000 0.000 -0.272 0.000 -0.059
## Cndtn1:NCt1 -0.018 -0.020 0.014 0.000 0.076 0.000 0.000
## Cndtn2:NCt1 -0.038 0.014 -0.087 0.000 -0.272 0.000 0.000 -0.059
## Stg1:NCtgr1 0.000 0.000 0.000 -0.048 0.000 -0.018 -0.038 0.000 0.000
## Cnd1:S1:NC1 0.000 0.000 0.000 -0.018 0.000 -0.020 0.014 0.000 0.000
## Cnd2:S1:NC1 0.000 0.000 0.000 -0.038 0.000 0.014 -0.087 0.000 0.000
## S1:NC1 C1:S1:
## Condition1
## Condition2
## Stage1
## NCategoris1
## Cndtn1:Stg1
## Cndtn2:Stg1
## Cndtn1:NCt1
## Cndtn2:NCt1
## Stg1:NCtgr1
## Cnd1:S1:NC1 0.076
## Cnd2:S1:NC1 -0.272 -0.059
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Plot and stat without highly regulars.
ggplot(data=subset(entropy.data,Measure=="IndividualMutualInformationLexical" & !(Participant %in% highly.regular.individuals))) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=1, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, lexical")
The model without the highly regulariser pairs still shows the 3-way interaction.
lexical.mi.model.nothigh <- lmerTest::lmer(Entropy ~ Condition * Stage * NCategories + (1 | Chain/Participant),
data=subset(entropy.data.for.analysis,Measure=='IndividualMutualInformationLexical' & !(Participant %in% highly.regular.individuals)))
## boundary (singular) fit: see help('isSingular')
summary(lexical.mi.model.nothigh)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Entropy ~ Condition * Stage * NCategories + (1 | Chain/Participant)
## Data:
## subset(entropy.data.for.analysis, Measure == "IndividualMutualInformationLexical" &
## !(Participant %in% highly.regular.individuals))
##
## REML criterion at convergence: 3.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.06634 -0.55198 -0.09018 0.55321 2.60403
##
## Random effects:
## Groups Name Variance Std.Dev.
## Participant:Chain (Intercept) 0.02536 0.1592
## Chain (Intercept) 0.00000 0.0000
## Residual 0.02473 0.1573
## Number of obs: 216, groups: Participant:Chain, 108; Chain, 86
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 4.423e-01 1.933e-02 1.020e+02 22.877
## Condition1 1.938e-02 2.539e-02 1.020e+02 0.763
## Condition2 1.314e-02 1.261e-02 1.020e+02 1.042
## Stage1 -1.917e-02 1.107e-02 1.020e+02 -1.732
## NCategories1 -1.945e-02 1.933e-02 1.020e+02 -1.006
## Condition1:Stage1 3.327e-02 1.454e-02 1.020e+02 2.289
## Condition2:Stage1 5.632e-03 7.218e-03 1.020e+02 0.780
## Condition1:NCategories1 -4.168e-02 2.539e-02 1.020e+02 -1.642
## Condition2:NCategories1 -7.004e-04 1.261e-02 1.020e+02 -0.056
## Stage1:NCategories1 -7.434e-03 1.107e-02 1.020e+02 -0.672
## Condition1:Stage1:NCategories1 -9.840e-03 1.454e-02 1.020e+02 -0.677
## Condition2:Stage1:NCategories1 1.540e-02 7.218e-03 1.020e+02 2.134
## Pr(>|t|)
## (Intercept) <2e-16 ***
## Condition1 0.4470
## Condition2 0.2999
## Stage1 0.0863 .
## NCategories1 0.3167
## Condition1:Stage1 0.0241 *
## Condition2:Stage1 0.4370
## Condition1:NCategories1 0.1037
## Condition2:NCategories1 0.9558
## Stage1:NCategories1 0.5034
## Condition1:Stage1:NCategories1 0.5000
## Condition2:Stage1:NCategories1 0.0353 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cndtn1 Cndtn2 Stage1 NCtgr1 Cn1:S1 Cn2:S1 C1:NC1 C2:NC1
## Condition1 0.075
## Condition2 -0.229 -0.057
## Stage1 0.000 0.000 0.000
## NCategoris1 -0.049 -0.018 -0.039 0.000
## Cndtn1:Stg1 0.000 0.000 0.000 0.075 0.000
## Cndtn2:Stg1 0.000 0.000 0.000 -0.229 0.000 -0.057
## Cndtn1:NCt1 -0.018 -0.020 0.013 0.000 0.075 0.000 0.000
## Cndtn2:NCt1 -0.039 0.013 -0.088 0.000 -0.229 0.000 0.000 -0.057
## Stg1:NCtgr1 0.000 0.000 0.000 -0.049 0.000 -0.018 -0.039 0.000 0.000
## Cnd1:S1:NC1 0.000 0.000 0.000 -0.018 0.000 -0.020 0.013 0.000 0.000
## Cnd2:S1:NC1 0.000 0.000 0.000 -0.039 0.000 0.013 -0.088 0.000 0.000
## S1:NC1 C1:S1:
## Condition1
## Condition2
## Stage1
## NCategoris1
## Cndtn1:Stg1
## Cndtn2:Stg1
## Cndtn1:NCt1
## Cndtn2:NCt1
## Stg1:NCtgr1
## Cnd1:S1:NC1 0.075
## Cnd2:S1:NC1 -0.229 -0.057
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Plot of class-based versus lexical MI, with line colour indicating significance of the two separate tests (not in paper).
Combine data sets.
entropy.data.both.mi.combined <- merge(entropy.data.lexical.mi.combined,entropy.data.class.mi.combined,by=c("Participant","Stage","Condition","NCategories","Chain","Generation"))
entropy.data.both.mi.combined$SignificantCombined <- paste(entropy.data.both.mi.combined$Significant.x,entropy.data.both.mi.combined$Significant.y)
entropy.data.both.mi.combined$SignificantCombined <- plyr::revalue(entropy.data.both.mi.combined$SignificantCombined,
c("FALSE FALSE"="Neither significant",
"TRUE FALSE"="Significant lexical conditioning only","FALSE TRUE"="Significant class conditioning only","TRUE TRUE"="Both significant"))
ggplot(data=subset(entropy.data.both.mi.combined,NCategories=="Two Categories")) +
facet_grid(Stage~Condition) +
geom_point(aes(x=ClassMI, y=LexicalMI, fill=SignificantCombined),shape=21) +
theme_bw() +
scale_fill_manual(values=c("red",NA,"grey","black")) +
#theme(legend.position = "none") +
#theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
The participants where both measures are significant are Dyad 3201 (Recall), Pseudo 2141 (Interaction), Dyad 3001 and 3222 (Interaction).
In addition to calculating mutual information based on each individual’s productions, we also calculate MI based on pairs (i.e. looking for conditioning in their collective behaviour). Alignment of conditioning in pairs in a dyad can therefore be evaluated by looking at pair-based measures of conditioning, and in particular whether those increase as we move from pre-interaction recall (where they can only be aligned by chance) to interaction.
The pair-based measures clearly do not increase in interaction.
#for pair-based measures we can aggergate across the members of the pair since the two individuals have the same values on those measures - otherwise we end up plotting 2 points per pair.
entropy.data.pairbased <- aggregate(Entropy~Chain+Condition+Stage+NCategories+Measure,data=entropy.data,FUN=mean)
ggplot(data=subset(entropy.data.pairbased,Condition=='Dyad' & Measure=="PairMutualInformationClass" & NCategories=="Two Categories")) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', dotsize=0.5, alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours[3]) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, category, pair-based")
class.mi.model.pair <- lmerTest::lmer(Entropy ~ Stage + (1 | Chain),
data=subset(entropy.data.pairbased,Measure=='PairMutualInformationClass' & NCategories=="Two Categories" & Condition=="Dyad"))
summary(class.mi.model.pair)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Entropy ~ Stage + (1 | Chain)
## Data:
## subset(entropy.data.pairbased, Measure == "PairMutualInformationClass" &
## NCategories == "Two Categories" & Condition == "Dyad")
##
## REML criterion at convergence: -97.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5721 -0.4226 -0.1606 0.4316 1.8323
##
## Random effects:
## Groups Name Variance Std.Dev.
## Chain (Intercept) 0.001607 0.04009
## Residual 0.000889 0.02982
## Number of obs: 32, groups: Chain, 16
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.405e-02 1.249e-02 2.121e+01 2.726 0.0126 *
## StageInteraction 6.962e-04 1.054e-02 1.500e+01 0.066 0.9482
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## StagIntrctn -0.422
Now pair-based lexical MI.
ggplot(data=subset(entropy.data.pairbased,Condition=='Dyad' & Measure=="PairMutualInformationLexical")) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours[3]) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, lexical, pair-based")
This model (where everything is treatment coded) shows no effect of Stage or NCategories but a Stage*NCategories interaction that shows that lexical (!) MI increased in interaction in Two-Category dyads but not one-category dyads.
lexical.mi.model.pair <- lmerTest::lmer(Entropy ~ Stage * NCategories + (1 | Chain),
data=subset(entropy.data.pairbased,Measure=='PairMutualInformationLexical' & Condition=="Dyad"))
summary(lexical.mi.model.pair)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Entropy ~ Stage * NCategories + (1 | Chain)
## Data:
## subset(entropy.data.pairbased, Measure == "PairMutualInformationLexical" &
## Condition == "Dyad")
##
## REML criterion at convergence: -64.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.48046 -0.57500 0.00044 0.43645 2.35254
##
## Random effects:
## Groups Name Variance Std.Dev.
## Chain (Intercept) 0.01084 0.10412
## Residual 0.00907 0.09524
## Number of obs: 64, groups: Chain, 32
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 0.239298 0.035276 46.281322
## StageInteraction -0.002735 0.033672 30.000000
## NCategoriesTwo Categories -0.040466 0.049888 46.281322
## StageInteraction:NCategoriesTwo Categories 0.112334 0.047620 30.000000
## t value Pr(>|t|)
## (Intercept) 6.784 1.87e-08 ***
## StageInteraction -0.081 0.936
## NCategoriesTwo Categories -0.811 0.421
## StageInteraction:NCategoriesTwo Categories 2.359 0.025 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) StgInt NCtgTC
## StagIntrctn -0.477
## NCtgrsTwCtg -0.707 0.337
## StgInt:NCTC 0.337 -0.707 -0.477
However, that analysis includes several one-category pairs who have 0 variation - what do the plots look like if we exclude those? That will allow us to see what pairs exhibiting variable behaviour during interaction look like.
Can identify fully-regular pairs using convergence.data (calculated earlier).
pair.maj.proportion.data.for.pair <- aggregate(MajorityParticleUse~Chain+Condition+NCategories+Stage,data=subset(convergence.data, (Stage == "Recall" | Stage == "Interaction")),FUN=mean)
fully.regular.pairs <- subset(pair.maj.proportion.data.for.pair,MajorityParticleUse==1)$Chain
highly.regular.pairs <- subset(pair.maj.proportion.data.for.pair,MajorityParticleUse>.95)$Chain
Plot and stat excluding fully regular pairs - the interaction is now highly marginal (p=.09)
ggplot(data=subset(entropy.data.pairbased,Condition=='Dyad' & Measure=="PairMutualInformationLexical" & !(Chain%in%fully.regular.pairs))) +
facet_grid(NCategories~.) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours[3]) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, lexical, pair-based")
lexical.mi.model.pair.notfull <- lmerTest::lmer(Entropy ~ Stage * NCategories + (1 | Chain),
data=subset(entropy.data.pairbased,Measure=='PairMutualInformationLexical' & Condition=="Dyad" & !(Chain%in%fully.regular.pairs)))
summary(lexical.mi.model.pair.notfull)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Entropy ~ Stage * NCategories + (1 | Chain)
## Data:
## subset(entropy.data.pairbased, Measure == "PairMutualInformationLexical" &
## Condition == "Dyad" & !(Chain %in% fully.regular.pairs))
##
## REML criterion at convergence: -62.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.38818 -0.58400 -0.02128 0.48370 2.15141
##
## Random effects:
## Groups Name Variance Std.Dev.
## Chain (Intercept) 0.007754 0.08806
## Residual 0.009285 0.09636
## Number of obs: 58, groups: Chain, 29
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 0.26990 0.03620 44.73525 7.455
## StageInteraction 0.02125 0.03779 27.00000 0.562
## NCategoriesTwo Categories -0.07107 0.04874 44.73525 -1.458
## StageInteraction:NCategoriesTwo Categories 0.08835 0.05088 27.00000 1.736
## Pr(>|t|)
## (Intercept) 2.24e-09 ***
## StageInteraction 0.5786
## NCategoriesTwo Categories 0.1518
## StageInteraction:NCategoriesTwo Categories 0.0939 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) StgInt NCtgTC
## StagIntrctn -0.522
## NCtgrsTwCtg -0.743 0.388
## StgInt:NCTC 0.388 -0.743 -0.522
Plot and stat excluding highly regular pairs (not just fully regular pairs) - the interaction goes.
ggplot(data=subset(entropy.data.pairbased,Condition=='Dyad' & Measure=="PairMutualInformationLexical" & !(Chain%in%highly.regular.pairs))) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=Stage, y=Entropy),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=Stage, y=Entropy),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=Stage, y=Entropy, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours[3]) +
theme(legend.position = "none") +
theme(axis.title.x = element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ylab("Mutual information, lexical, pair-based")
lexical.mi.model.pair.nothigh <- lmerTest::lmer(Entropy ~ Stage * NCategories + (1 | Chain),
data=subset(entropy.data.pairbased,Measure=='PairMutualInformationLexical' & Condition=="Dyad" & !(Chain%in%highly.regular.pairs)))
summary(lexical.mi.model.pair.nothigh)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Entropy ~ Stage * NCategories + (1 | Chain)
## Data:
## subset(entropy.data.pairbased, Measure == "PairMutualInformationLexical" &
## Condition == "Dyad" & !(Chain %in% highly.regular.pairs))
##
## REML criterion at convergence: -62.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.47931 -0.59698 -0.00972 0.49628 1.99214
##
## Random effects:
## Groups Name Variance Std.Dev.
## Chain (Intercept) 0.004986 0.07061
## Residual 0.009640 0.09818
## Number of obs: 54, groups: Chain, 27
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 0.29926 0.03646 44.79479 8.207
## StageInteraction 0.03302 0.04187 25.00000 0.789
## NCategoriesTwo Categories -0.10043 0.04737 44.79479 -2.120
## StageInteraction:NCategoriesTwo Categories 0.07658 0.05438 25.00000 1.408
## Pr(>|t|)
## (Intercept) 1.79e-10 ***
## StageInteraction 0.4377
## NCategoriesTwo Categories 0.0396 *
## StageInteraction:NCategoriesTwo Categories 0.1714
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) StgInt NCtgTC
## StagIntrctn -0.574
## NCtgrsTwCtg -0.770 0.442
## StgInt:NCTC 0.442 -0.770 -0.574
Do we see priming of particle choice? And does across- vs within-category priming differ in the 2-category conditions?
all.data.for.priming.analysis <- subset(all.data.including.ungrammatical, Stage == "interactD")
#joint trial numbering (TrialNumber is per participant)
all.data.for.priming.analysis$TrialNumberPair = rep(seq(1,nrow(all.data.for.priming.analysis)/length(levels(all.data.for.priming.analysis$Chain))),length(levels(all.data.for.priming.analysis$Chain)))
For standard priming analysis, want to have partner’s previous particle as a predictor - since each pair is assigned a random pair of particles, need to recode for compatability. Recoding to “NoParticle”, “Particle1” and “Particle2”.
all.data.for.priming.analysis.recoded <- data.frame()
for (p in levels(all.data.for.priming.analysis$Chain)) {
this.pair.data <- subset(all.data.for.priming.analysis, Chain == p)
this.pair.training.particles <- as.character(unique(subset(all.data, Chain == p & Stage == "training" & Number==2)$TrainingParticle))
this.pair.training.particles.shuffled <- sample(this.pair.training.particles)
this.pair.data$ParticleRecoded <- mapply(function (n,particle) ifelse(particle == 0,"NoParticle",ifelse(particle==this.pair.training.particles.shuffled[1],"Particle1",ifelse(particle == this.pair.training.particles.shuffled[2],"Particle2",NA))),
this.pair.data$Number,this.pair.data$Particle)
all.data.for.priming.analysis.recoded<-rbind(all.data.for.priming.analysis.recoded,this.pair.data)
}
calc.partner.previous.particle calculates the last
particle produced by the partner - NB we can’t just work this out by
offsetting the ParticleRecoded column by one, because we want to skip
NoParticle trials.
#calculates whether particle used on this.trial.number is same or different as last particle produced by partner for a plural (NA if partner has not yet produced a plural)
calc.partner.previous.particle <- function(data,
this.trial.number,
this.pair, this.director) {
previous.trials <- subset(data,
Chain==this.pair &
ParticipantID!=this.director &
Number==2 & TrialNumberPair < this.trial.number)
if (nrow(previous.trials)==0) { #relevant individual hasn't produced for this item yet
NA}
else {
tail(previous.trials$ParticleRecoded,1) }
}
all.data.for.priming.analysis.recoded$PartnerLastParticle <-
mapply(function(trialn,pair,director) calc.partner.previous.particle(all.data.for.priming.analysis.recoded,trialn,pair,director),
all.data.for.priming.analysis.recoded$TrialNumberPair,
all.data.for.priming.analysis.recoded$Chain,
all.data.for.priming.analysis.recoded$ParticipantID)
For the purposes of comparing within- vs between-category primes, it will also be useful to note the category of the referent associated with PartnerLastParticle. And we can do the same based on whether it’s the same or a different noun. We also want to know whether the partner’s last production was grammatical or not.
calc.partner.previous.class <- function(data,
this.trial.number,
this.pair, this.director) {
previous.trials <- subset(data,
Chain==this.pair &
ParticipantID!=this.director &
Number==2 & TrialNumberPair < this.trial.number)
if (nrow(previous.trials)==0) { #relevant individual hasn't produced for this item yet
NA}
else {
#have to wrap this in an ifelse for some annoying reason to do with levels
ifelse(tail(previous.trials$Category,1)=="V","V", ifelse(tail(previous.trials$Category,1)=="A","A",NA))}
}
all.data.for.priming.analysis.recoded$PartnerLastParticleClass <-
mapply(function(trialn,pair,director) calc.partner.previous.class(all.data.for.priming.analysis.recoded,trialn,pair,director),
all.data.for.priming.analysis.recoded$TrialNumberPair,
all.data.for.priming.analysis.recoded$Chain,
all.data.for.priming.analysis.recoded$ParticipantID)
calc.partner.previous.noun <- function(data,
this.trial.number,
this.pair, this.director) {
previous.trials <- subset(data,
Chain==this.pair &
ParticipantID!=this.director &
Number==2 & TrialNumberPair < this.trial.number)
if (nrow(previous.trials)==0) { #relevant individual hasn't produced for this item yet
NA}
else {
as.character(tail(previous.trials$Noun,1))}
}
all.data.for.priming.analysis.recoded$PartnerLastParticleNoun <-
mapply(function(trialn,pair,director) calc.partner.previous.noun(all.data.for.priming.analysis.recoded,trialn,pair,director),
all.data.for.priming.analysis.recoded$TrialNumberPair,
all.data.for.priming.analysis.recoded$Chain,
all.data.for.priming.analysis.recoded$ParticipantID)
calc.partner.previous.grammaticality <- function(data,
this.trial.number,
this.pair, this.director) {
previous.trials <- subset(data,
Chain==this.pair &
ParticipantID!=this.director &
Number==2 & TrialNumberPair < this.trial.number)
if (nrow(previous.trials)==0) { #relevant individual hasn't produced for this item yet
NA}
else {
tail(previous.trials$Grammatical,1)}
}
all.data.for.priming.analysis.recoded$PartnerLastGrammaticality <-
mapply(function(trialn,pair,director) calc.partner.previous.grammaticality(all.data.for.priming.analysis.recoded,trialn,pair,director),
all.data.for.priming.analysis.recoded$TrialNumberPair,
all.data.for.priming.analysis.recoded$Chain,
all.data.for.priming.analysis.recoded$ParticipantID)
Finally, for plotting etc want to code ParticleRecoded as NA/0/1 (NoParticle/Particle1/Particle2 respectively), and add a column with a binary coding for whether partner’s prime was for the same or a different class, same or a different noun.
all.data.for.priming.analysis.recoded$ParticleRecodedBinary <-
ifelse(all.data.for.priming.analysis.recoded$ParticleRecoded=="NoParticle",NA,
ifelse(all.data.for.priming.analysis.recoded$ParticleRecoded=="Particle1",0,1))
all.data.for.priming.analysis.recoded$PrimeOfSameCategory <- all.data.for.priming.analysis.recoded$Category==all.data.for.priming.analysis.recoded$PartnerLastParticleClass
all.data.for.priming.analysis.recoded$PrimeOfSameNoun <- all.data.for.priming.analysis.recoded$Noun==all.data.for.priming.analysis.recoded$PartnerLastParticleNoun
#get rid of dummy partner and occasional cases (in dyads only) where partner produced no particle on last plural.
all.data.for.priming.analysis.recoded <- subset(all.data.for.priming.analysis.recoded,ParticipantID!="dummyP" & PartnerLastParticle!='NoParticle')
#sort out factor levels and labels
all.data.for.priming.analysis.recoded$Condition <- factor(all.data.for.priming.analysis.recoded$Condition, levels=c("Single","PseudoDyad","Dyad"))
all.data.for.priming.analysis.recoded$NCategories <- plyr::revalue(factor(all.data.for.priming.analysis.recoded$NCategories),
c("1"="One Category",
"2"="Two Categories"))
all.data.for.priming.analysis.recoded$PrimeOfSameCategoryPretty <- plyr::revalue(factor(all.data.for.priming.analysis.recoded$PrimeOfSameCategory),
c("FALSE"="Different Category Prime",
"TRUE"="Same Category Prime"))
all.data.for.priming.analysis.recoded$PrimeOfSameNounPretty <- plyr::revalue(factor(all.data.for.priming.analysis.recoded$PrimeOfSameNoun),
c("FALSE"="Different Noun Prime",
"TRUE"="Same Noun Prime"))
Exclude trials where participant or partner’s prime was an illegal description. Counts suggest there are no such cases.
plyr::ddply(all.data.for.priming.analysis.recoded,~Grammatical+PartnerLastGrammaticality,plyr::summarise,Current_N=length(ParticipantID))
## Grammatical PartnerLastGrammaticality Current_N
## 1 FALSE TRUE 93
## 2 TRUE TRUE 8646
all.data.for.priming.analysis.recoded <- subset(all.data.for.priming.analysis.recoded,Grammatical & PartnerLastGrammaticality)
Summarize the data for plotting - by-participant proportions.
aggregated.priming.data <- aggregate(ParticleRecodedBinary~Chain+ParticipantID+Condition+NCategories+PartnerLastParticle,data=all.data.for.priming.analysis.recoded,FUN=mean)
aggregated.priming.data$PartnerLastParticle <- plyr::revalue(aggregated.priming.data$PartnerLastParticle,c("Particle1"="Marker 1","Particle2"="Marker 2"))
Plot.
ggplot(data=aggregated.priming.data) +
facet_grid(NCategories~Condition) +
stat_summary(aes(x=PartnerLastParticle, y=ParticleRecodedBinary),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=PartnerLastParticle, y=ParticleRecodedBinary),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=PartnerLastParticle, y=ParticleRecodedBinary, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
xlab("Partner's last production") +
ylab("Plural marked using Marker 2")
ggsave("Figures/priming.pdf",width=8,height=6)
Stat - is choice of particle shaped by partner’s last production?
Coding as before - helmert for condition, everything else sum-coded.
priming.data.for.analysis <- all.data.for.priming.analysis.recoded
priming.data.for.analysis$PartnerLastParticle <- factor(priming.data.for.analysis$PartnerLastParticle)
contrasts(priming.data.for.analysis$Condition) <- contr.helmert(3)
contrasts(priming.data.for.analysis$NCategories) <- -contr.sum(2) #NB - so that Two Categories coded as 1
contrasts(priming.data.for.analysis$PartnerLastParticle) <- -contr.sum(2) #NB - so that Particle2 coded as 1
contrasts(priming.data.for.analysis$PrimeOfSameCategory) <- -contr.sum(2) #NB - so that TRUE coded as 1
contrasts(priming.data.for.analysis$PrimeOfSameNoun) <- -contr.sum(2) #NB - so that TRUE coded as 1
#singular fit
#priming.model <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle + (1 + PartnerLastParticle | Chain/ParticipantID), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#simplifying random effect structure for Participant while keeping nesting
priming.model <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle + (1 + PartnerLastParticle | Chain) + (1 | ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
Model shows effects of:
PartnerLastParticle (there is priming - if partner produced particle 2 you are more likely to do so)
condition x partner last particle (marginal): a larger priming effect in dyads
summary(priming.model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle +
## (1 + PartnerLastParticle | Chain) + (1 | ParticipantID:Chain)
## Data: priming.data.for.analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 6702.2 6808.7 -3335.1 6670.2 5744
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5864 -0.8277 0.1456 0.7788 4.3123
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.64747 0.8047
## Chain (Intercept) 0.81870 0.9048
## PartnerLastParticle1 0.08374 0.2894 0.21
## Number of obs: 5760, groups: ParticipantID:Chain, 124; Chain, 92
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) 0.155342 0.127555 1.218
## Condition1 0.142586 0.162719 0.876
## Condition2 0.067514 0.086494 0.781
## NCategories1 -0.239902 0.127583 -1.880
## PartnerLastParticle1 0.352081 0.046835 7.517
## Condition1:NCategories1 -0.034238 0.162719 -0.210
## Condition2:NCategories1 -0.096031 0.086171 -1.114
## Condition1:PartnerLastParticle1 0.006612 0.056148 0.118
## Condition2:PartnerLastParticle1 0.066145 0.033507 1.974
## NCategories1:PartnerLastParticle1 0.031551 0.046758 0.675
## Condition1:NCategories1:PartnerLastParticle1 0.001005 0.056138 0.018
## Condition2:NCategories1:PartnerLastParticle1 -0.027747 0.033395 -0.831
## Pr(>|z|)
## (Intercept) 0.2233
## Condition1 0.3809
## Condition2 0.4351
## NCategories1 0.0601 .
## PartnerLastParticle1 5.58e-14 ***
## Condition1:NCategories1 0.8333
## Condition2:NCategories1 0.2651
## Condition1:PartnerLastParticle1 0.9063
## Condition2:PartnerLastParticle1 0.0484 *
## NCategories1:PartnerLastParticle1 0.4998
## Condition1:NCategories1:PartnerLastParticle1 0.9857
## Condition2:NCategories1:PartnerLastParticle1 0.4061
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cndtn1 Cndtn2 NCtgr1 PrtLP1 Cn1:NC1 Cn2:NC1 C1:PLP C2:PLP
## Condition1 0.086
## Condition2 -0.124 -0.063
## NCategoris1 -0.042 -0.005 -0.009
## PrtnrLstPr1 0.085 0.013 -0.034 0.014
## Cndtn1:NCt1 -0.005 -0.034 0.004 0.085 -0.006
## Cndtn2:NCt1 -0.007 0.004 -0.052 -0.126 0.037 -0.063
## Cndtn1:PLP1 0.012 0.105 -0.013 -0.004 0.084 -0.014 0.006
## Cndtn2:PLP1 -0.029 -0.009 0.066 0.038 0.052 0.003 0.036 -0.056
## NCtgr1:PLP1 0.016 -0.005 0.047 0.082 -0.104 0.013 -0.033 -0.006 -0.103
## C1:NC1:PLP1 -0.007 -0.014 0.002 0.015 -0.001 0.105 -0.009 -0.026 0.004
## C2:NC1:PLP1 0.039 0.004 0.046 -0.033 -0.106 -0.008 0.067 -0.002 -0.165
## NC1:PL C1:NC1:
## Condition1
## Condition2
## NCategoris1
## PrtnrLstPr1
## Cndtn1:NCt1
## Cndtn2:NCt1
## Cndtn1:PLP1
## Cndtn2:PLP1
## NCtgr1:PLP1
## C1:NC1:PLP1 0.079
## C2:NC1:PLP1 0.048 -0.061
Same analysis looking at the effect of within- vs across-category priming (for 2-category conditions only).
aggregated.priming.data.2category <- aggregate(ParticleRecodedBinary~Chain+ParticipantID+Condition+NCategories+PartnerLastParticle+PrimeOfSameCategoryPretty,data=subset(all.data.for.priming.analysis.recoded,NCategories=="Two Categories"),FUN=mean)
aggregated.priming.data.2category$PartnerLastParticle <- plyr::revalue(aggregated.priming.data.2category$PartnerLastParticle,c("Particle1"="Marker 1","Particle2"="Marker 2"))
Plot. NB flipping the layout of the grid to make it easier to compare within conditions.
ggplot(data=aggregated.priming.data.2category) +
facet_grid(Condition~PrimeOfSameCategoryPretty) +
stat_summary(aes(x=PartnerLastParticle, y=ParticleRecodedBinary),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=PartnerLastParticle, y=ParticleRecodedBinary),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=PartnerLastParticle, y=ParticleRecodedBinary, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
xlab("Partner's last production") +
ylab("Plural marked using Marker 2")
ggsave("Figures/priming_categoryboost.pdf",width=8,height=6)
Stat for 2-category conditions only, is there a difference for within- vs between-category priming?
priming.model.2category <- glmer(ParticleRecodedBinary ~ Condition * PrimeOfSameCategory * PartnerLastParticle + (1 + PartnerLastParticle | Chain/ParticipantID), data=subset(priming.data.for.analysis,NCategories=="Two Categories"), family=binomial, control=glmerControl(optimizer="bobyqa"))
There is no PrimeOfSameCategory1:PartnerLastParticle1 interaction (which would suggest stronger priming within- than across-category).
summary(priming.model.2category)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## ParticleRecodedBinary ~ Condition * PrimeOfSameCategory * PartnerLastParticle +
## (1 + PartnerLastParticle | Chain/ParticipantID)
## Data: subset(priming.data.for.analysis, NCategories == "Two Categories")
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 3606.6 3714.2 -1785.3 3570.6 2898
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1999 -0.8137 -0.2542 0.8458 3.9343
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.87798 0.9370
## PartnerLastParticle1 0.02610 0.1616 0.55
## Chain (Intercept) 0.05328 0.2308
## PartnerLastParticle1 0.06226 0.2495 -1.00
## Number of obs: 2916, groups: ParticipantID:Chain, 63; Chain, 47
##
## Fixed effects:
## Estimate Std. Error
## (Intercept) -0.066436 0.137890
## Condition1 0.103236 0.183665
## Condition2 -0.009838 0.087961
## PrimeOfSameCategory1 -0.073662 0.043717
## PartnerLastParticle1 0.397065 0.062521
## Condition1:PrimeOfSameCategory1 -0.040057 0.057917
## Condition2:PrimeOfSameCategory1 0.028314 0.028136
## Condition1:PartnerLastParticle1 0.007632 0.079237
## Condition2:PartnerLastParticle1 0.057280 0.042444
## PrimeOfSameCategory1:PartnerLastParticle1 0.067696 0.043745
## Condition1:PrimeOfSameCategory1:PartnerLastParticle1 -0.076833 0.057971
## Condition2:PrimeOfSameCategory1:PartnerLastParticle1 -0.020316 0.028158
## z value Pr(>|z|)
## (Intercept) -0.482 0.630
## Condition1 0.562 0.574
## Condition2 -0.112 0.911
## PrimeOfSameCategory1 -1.685 0.092 .
## PartnerLastParticle1 6.351 2.14e-10 ***
## Condition1:PrimeOfSameCategory1 -0.692 0.489
## Condition2:PrimeOfSameCategory1 1.006 0.314
## Condition1:PartnerLastParticle1 0.096 0.923
## Condition2:PartnerLastParticle1 1.350 0.177
## PrimeOfSameCategory1:PartnerLastParticle1 1.548 0.122
## Condition1:PrimeOfSameCategory1:PartnerLastParticle1 -1.325 0.185
## Condition2:PrimeOfSameCategory1:PartnerLastParticle1 -0.722 0.471
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cndtn1 Cndtn2 PrOSC1 PrtLP1 Cn1:POSC1 Cn2:POSC1 C1:PLP
## Condition1 0.087
## Condition2 -0.288 -0.068
## PrmOfSmCtg1 0.004 0.004 0.004
## PrtnrLstPr1 0.023 0.005 -0.046 -0.008
## Cndt1:POSC1 0.003 0.000 -0.004 0.094 0.004
## Cndt2:POSC1 0.002 -0.003 0.005 -0.265 -0.005 -0.074
## Cndtn1:PLP1 0.006 0.048 -0.003 0.006 0.083 -0.004 -0.005
## Cndtn2:PLP1 -0.044 -0.003 -0.005 -0.006 -0.107 -0.004 -0.013 -0.065
## PrOSC1:PLP1 -0.003 0.003 -0.003 -0.049 0.006 -0.011 0.027 0.008
## C1:POSC1:PL 0.004 -0.001 -0.002 -0.011 0.008 -0.056 0.009 0.001
## C2:POSC1:PL -0.002 -0.003 -0.005 0.026 0.006 0.009 -0.037 -0.006
## C2:PLP POSC1: C1:POSC1:
## Condition1
## Condition2
## PrmOfSmCtg1
## PrtnrLstPr1
## Cndt1:POSC1
## Cndt2:POSC1
## Cndtn1:PLP1
## Cndtn2:PLP1
## PrOSC1:PLP1 0.007
## C1:POSC1:PL -0.005 0.092
## C2:POSC1:PL 0.011 -0.266 -0.072
Finally, same analysis looking at the effect of within- vs across-noun priming (the classic lexical boost).
aggregated.priming.data.noun <- aggregate(ParticleRecodedBinary~Chain+ParticipantID+Condition+NCategories+PartnerLastParticle+PrimeOfSameNounPretty,data=subset(all.data.for.priming.analysis.recoded),FUN=mean)
aggregated.priming.data.noun$PartnerLastParticle <- plyr::revalue(aggregated.priming.data.noun$PartnerLastParticle,c("Particle1"="Marker 1","Particle2"="Marker 2"))
Plot (not in paper). NB flipping the layout of the grid to make it easier to compare within conditions. Plotting collapsing over 1 vs 2 category first…
Looks like there’s clearly a huge effect of same-noun primes (the effect of the partner particle choice is much bigger on same noun trials). This effect looks bigger in dyads, but the stats below do not support this. NB the number of trials in SameNounPrime is very small.
ggplot(data=aggregated.priming.data.noun) +
facet_grid(Condition~PrimeOfSameNounPretty) +
stat_summary(aes(x=PartnerLastParticle, y=ParticleRecodedBinary),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=PartnerLastParticle, y=ParticleRecodedBinary),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=PartnerLastParticle, y=ParticleRecodedBinary, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
xlab("Partner's last production") +
ylab("Plural marked using Marker 2")
ggsave("Figures/priming_lexicalboost.pdf",width=8,height=6)
Same plot for Dyads only, split by One vs Two Category (not included in paper).
ggplot(data=subset(aggregated.priming.data.noun,Condition=="Dyad")) +
facet_grid(NCategories~PrimeOfSameNounPretty) +
stat_summary(aes(x=PartnerLastParticle, y=ParticleRecodedBinary),geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(aes(x=PartnerLastParticle, y=ParticleRecodedBinary),geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
geom_dotplot(aes(x=PartnerLastParticle, y=ParticleRecodedBinary, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
theme_bw() +
scale_fill_manual(values=my.colours) +
theme(legend.position = "none") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
xlab("Partner's last production") +
ylab("Plural marked using Marker 2")
Stat.
#singular fit
#priming.model.noun <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PrimeOfSameNoun * PartnerLastParticle + (1 + PartnerLastParticle * PrimeOfSameNoun | Chain/ParticipantID), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#no perfect correlations between random effects but variance on PartnerLastParticle1:PrimeOfSameNoun1 is smallest so taking that out
#still singular
#priming.model.noun <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PrimeOfSameNoun * PartnerLastParticle + (1 + PartnerLastParticle * PrimeOfSameNoun | Chain) + (1 + PartnerLastParticle + PrimeOfSameNoun |ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#try removing interaction by chain too
#still singular
#priming.model.noun <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PrimeOfSameNoun * PartnerLastParticle + (1 + PartnerLastParticle + PrimeOfSameNoun | Chain) + (1 + PartnerLastParticle + PrimeOfSameNoun | ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#simplify Chain random effect further
#still singular
#priming.model.noun <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PrimeOfSameNoun * PartnerLastParticle + (1 + PartnerLastParticle | Chain) + (1 + PartnerLastParticle + PrimeOfSameNoun | ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#reduce Chain to intercept-only
#still singular
#priming.model.noun <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PrimeOfSameNoun * PartnerLastParticle + (1 | Chain) + (1 + PartnerLastParticle + PrimeOfSameNoun | ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#simplify by-participant random effect further
priming.model.noun <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PrimeOfSameNoun * PartnerLastParticle + (1 | Chain) + (1 + PartnerLastParticle | ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
This model should be treated with caution since the random effects are extremely simplified - but the lexical boost is extremely clear.
There is a clear lexical boost (as indicated by a significant PrimeOfSameNoun:PartnerLastParticle interaction).
These two extremely marginal effects might also be potentially interesting. Condition2:NCategories1:PartnerLastParticle1 -0.12371, p=.072 Condition2:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 -0.11818 p=.072
The first would suggest less priming in the Two Category Dyads, the second would suggest a weaker lexical boost in the Two Category Dyads, both consistent with the reduced tendency of participants in this condition to converge on a fully regular system.
There are a couple of other effects that show up as significant but are not interpretable. This model also suggests stronger priming in the Dyad condition (as indicated by Condition2:PartnerLastParticle1), but note that here the coding is -1,1 for noun similarity, so weighs rare same noun trials quite heavily (i.e. this predictor is sum-coded, not centred). There are a couple of significant 3-way interactions not involving PartnerLastParticle, which just mean that the particle coded as Particle2 happens to be more frequent in those combinations of conditions - this cannot be other than by chance since the coding is random.
summary(priming.model.noun)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: ParticleRecodedBinary ~ Condition * NCategories * PrimeOfSameNoun *
## PartnerLastParticle + (1 | Chain) + (1 + PartnerLastParticle |
## ParticipantID:Chain)
## Data: priming.data.for.analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 6703.1 6889.6 -3323.6 6647.1 5732
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5029 -0.8210 0.1504 0.7892 4.4195
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.66927 0.8181
## PartnerLastParticle1 0.08038 0.2835 0.13
## Chain (Intercept) 0.91279 0.9554
## Number of obs: 5760, groups: ParticipantID:Chain, 124; Chain, 92
##
## Fixed effects:
## Estimate
## (Intercept) 0.252180
## Condition1 0.180551
## Condition2 0.062846
## NCategories1 -0.340277
## PrimeOfSameNoun1 0.095626
## PartnerLastParticle1 0.608444
## Condition1:NCategories1 0.002112
## Condition2:NCategories1 -0.050866
## Condition1:PrimeOfSameNoun1 0.043056
## Condition2:PrimeOfSameNoun1 -0.016839
## NCategories1:PrimeOfSameNoun1 -0.104926
## Condition1:PartnerLastParticle1 0.041880
## Condition2:PartnerLastParticle1 0.102038
## NCategories1:PartnerLastParticle1 0.022636
## PrimeOfSameNoun1:PartnerLastParticle1 0.298248
## Condition1:NCategories1:PrimeOfSameNoun1 0.040434
## Condition2:NCategories1:PrimeOfSameNoun1 0.054609
## Condition1:NCategories1:PartnerLastParticle1 -0.103346
## Condition2:NCategories1:PartnerLastParticle1 -0.092763
## Condition1:PrimeOfSameNoun1:PartnerLastParticle1 0.039296
## Condition2:PrimeOfSameNoun1:PartnerLastParticle1 0.053342
## NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 -0.023676
## Condition1:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 -0.118330
## Condition2:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 -0.087409
## Std. Error
## (Intercept) 0.150435
## Condition1 0.191910
## Condition2 0.101671
## NCategories1 0.150521
## PrimeOfSameNoun1 0.078620
## PartnerLastParticle1 0.084206
## Condition1:NCategories1 0.191917
## Condition2:NCategories1 0.101822
## Condition1:PrimeOfSameNoun1 0.100512
## Condition2:PrimeOfSameNoun1 0.053025
## NCategories1:PrimeOfSameNoun1 0.078603
## Condition1:PartnerLastParticle1 0.107214
## Condition2:PartnerLastParticle1 0.056980
## NCategories1:PartnerLastParticle1 0.084046
## PrimeOfSameNoun1:PartnerLastParticle1 0.078630
## Condition1:NCategories1:PrimeOfSameNoun1 0.100551
## Condition2:NCategories1:PrimeOfSameNoun1 0.053022
## Condition1:NCategories1:PartnerLastParticle1 0.107185
## Condition2:NCategories1:PartnerLastParticle1 0.056798
## Condition1:PrimeOfSameNoun1:PartnerLastParticle1 0.100525
## Condition2:PrimeOfSameNoun1:PartnerLastParticle1 0.053038
## NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 0.078606
## Condition1:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 0.100528
## Condition2:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 0.053047
## z value Pr(>|z|)
## (Intercept) 1.676 0.093672
## Condition1 0.941 0.346802
## Condition2 0.618 0.536486
## NCategories1 -2.261 0.023781
## PrimeOfSameNoun1 1.216 0.223864
## PartnerLastParticle1 7.226 4.99e-13
## Condition1:NCategories1 0.011 0.991221
## Condition2:NCategories1 -0.500 0.617385
## Condition1:PrimeOfSameNoun1 0.428 0.668384
## Condition2:PrimeOfSameNoun1 -0.318 0.750817
## NCategories1:PrimeOfSameNoun1 -1.335 0.181913
## Condition1:PartnerLastParticle1 0.391 0.696074
## Condition2:PartnerLastParticle1 1.791 0.073332
## NCategories1:PartnerLastParticle1 0.269 0.787680
## PrimeOfSameNoun1:PartnerLastParticle1 3.793 0.000149
## Condition1:NCategories1:PrimeOfSameNoun1 0.402 0.687596
## Condition2:NCategories1:PrimeOfSameNoun1 1.030 0.303041
## Condition1:NCategories1:PartnerLastParticle1 -0.964 0.334955
## Condition2:NCategories1:PartnerLastParticle1 -1.633 0.102421
## Condition1:PrimeOfSameNoun1:PartnerLastParticle1 0.391 0.695861
## Condition2:PrimeOfSameNoun1:PartnerLastParticle1 1.006 0.314544
## NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 -0.301 0.763268
## Condition1:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 -1.177 0.239162
## Condition2:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 -1.648 0.099398
##
## (Intercept) .
## Condition1
## Condition2
## NCategories1 *
## PrimeOfSameNoun1
## PartnerLastParticle1 ***
## Condition1:NCategories1
## Condition2:NCategories1
## Condition1:PrimeOfSameNoun1
## Condition2:PrimeOfSameNoun1
## NCategories1:PrimeOfSameNoun1
## Condition1:PartnerLastParticle1
## Condition2:PartnerLastParticle1 .
## NCategories1:PartnerLastParticle1
## PrimeOfSameNoun1:PartnerLastParticle1 ***
## Condition1:NCategories1:PrimeOfSameNoun1
## Condition2:NCategories1:PrimeOfSameNoun1
## Condition1:NCategories1:PartnerLastParticle1
## Condition2:NCategories1:PartnerLastParticle1
## Condition1:PrimeOfSameNoun1:PartnerLastParticle1
## Condition2:PrimeOfSameNoun1:PartnerLastParticle1
## NCategories1:PrimeOfSameNoun1:PartnerLastParticle1
## Condition1:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1
## Condition2:NCategories1:PrimeOfSameNoun1:PartnerLastParticle1 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 24 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
We also want to test the hypothesis that you should get more priming across similar nouns (which means more priming in general in the one-category conditions, since the nouns are all more similar?).
First, load embeddings - these are from http://vectors.nlpl.eu/repository/, I am using the BNC ones - and tidy up a bit.
all_embeddings <- read.csv("embeddings/0/model.txt",sep=' ',header=FALSE)
all_embeddings <- all_embeddings[2:nrow(all_embeddings),] #remove first row, which is just info on df size
colnames(all_embeddings)[1] <- "word" #rename first column to something more descriptive
#pull out embeddings for words in this experiment
embeddings <- subset(all_embeddings,word %in% c("cow_NOUN", "dog_NOUN", "elephant_NOUN", "fox_NOUN", "giraffe_NOUN", "hamster_NOUN", "hedgehog_NOUN", "hippo_NOUN", "kangaroo_NOUN", "panda_NOUN", "pig_NOUN", "rabbit_NOUN", "sheep_NOUN", "squirrel_NOUN", "tiger_NOUN", "zebra_NOUN", "ambulance_NOUN", "bike_NOUN", "boat_NOUN", "bus_NOUN", "car_NOUN", "digger_NOUN", "submarine_NOUN", "helicopter_NOUN", "plane_NOUN", "rocket_NOUN", "scooter_NOUN", "tank_NOUN", "tractor_NOUN", "train_NOUN", "truck_NOUN", "van_NOUN"))
#strip out pos tag
embeddings$word <- stringr::str_replace(embeddings$word,"_NOUN",replacement = "")
Next, calculate all-pairs similarities. Add category info so we can verify that across-category pairings and within-category pairings are different.
all_pairs <- expand.grid(embeddings$word,embeddings$word)
colnames(all_pairs) <- c("w1","w2")
all_pairs$similarity <- mapply(function(w1,w2) lsa::cosine(as.numeric(embeddings[embeddings$word==w1,2:301]),as.numeric(embeddings[embeddings$word==w2,2:301])),
all_pairs$w1,all_pairs$w2)
all_pairs$pair_type <- mapply(function(w1,w2) ifelse(((w1 %in% animals) & (w2 %in% animals)) | ((w1 %in% vehicles) & (w2 %in% vehicles)),"SameCategory","DifferentCategory"),
all_pairs$w1,all_pairs$w2)
Check that same-category similarities are higher! NB removing similarity=1 items, which are just items compared to themselves.
ggplot(data=subset(all_pairs, similarity<1),aes(x=pair_type, y=similarity, fill=pair_type,colour=pair_type)) +
geom_dotplot(binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
stat_summary(geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
theme_bw()
Now add this similarity info to priming.data.for.analysis.
priming.data.for.analysis$embedding_similarity <- mapply(function(w1,w2) all_pairs[all_pairs$w1==w1 & all_pairs$w2==w2,"similarity"],
priming.data.for.analysis$Noun, priming.data.for.analysis$PartnerLastParticleNoun)
Verify that average prime-to-probe noun similarity is higher in One Category than Two Category conditions.
Means.
mean(subset(priming.data.for.analysis,NCategories=="One Category")$embedding_similarity)
## [1] 0.4222788
sd(subset(priming.data.for.analysis,NCategories=="One Category")$embedding_similarity)
## [1] 0.1768293
mean(subset(priming.data.for.analysis,NCategories=="Two Categories")$embedding_similarity)
## [1] 0.323214
sd(subset(priming.data.for.analysis,NCategories=="Two Categories")$embedding_similarity)
## [1] 0.1987015
Plot.
ggplot(data=priming.data.for.analysis,aes(x=NCategories, y=embedding_similarity, fill=NCategories,colour=NCategories)) +
geom_jitter(alpha=0.5,height=0) +
stat_summary(geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
theme_bw() +
scale_fill_manual(values=my.colours) +
scale_colour_manual(values=my.colours) +
theme(legend.position = "none") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank()) +
ylab("Embedding similarity")
Possibly a more informative measure - what is the average item-to-item similarity in One vs Two Category?
noun.set.data <- data.frame()
for (p in unique(all.data.for.proportion.analysis$ParticipantID)) {
this.participant <- subset(all.data.for.proportion.analysis, ParticipantID == p)
this.participant.condition <- unique(this.participant$NCategories)
training.data.p <- subset(this.participant, Stage=='training')
nouns <- unique(training.data.p$Noun)
sum_similarity <- 0
count <- 0
for (n1 in nouns) {
for (n2 in nouns) {
if (!(n1==n2)) {
count<- count+1
sim <- all_pairs[all_pairs$w1==n1 & all_pairs$w2==n2,"similarity"]
sum_similarity <- sum_similarity + sim
}
}
}
noun.set.data <- rbind(noun.set.data,
data.frame(
ParticipantID=p,
NCategories=this.participant.condition,
AverageSimilarity=sum_similarity/count))
}
Means.
mean(subset(noun.set.data,NCategories==1)$AverageSimilarity)
## [1] 0.3870913
sd(subset(noun.set.data,NCategories==1)$AverageSimilarity)
## [1] 0.02487501
mean(subset(noun.set.data,NCategories==2)$AverageSimilarity)
## [1] 0.2871402
sd(subset(noun.set.data,NCategories==2)$AverageSimilarity)
## [1] 0.01166254
Plot. NB there are only 2 possible values for the One Category condition because we use all the nouns from a given category - all the animals, or all the vehicles.
noun.set.data$NCategories <- factor(noun.set.data$NCategories)
ggplot(data=noun.set.data,aes(x=NCategories, y=AverageSimilarity, fill=NCategories,colour=NCategories)) +
geom_jitter(alpha=0.5,height=0) +
stat_summary(geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
theme_bw() +
scale_fill_manual(values=my.colours) +
scale_colour_manual(values=my.colours) +
theme(legend.position = "none") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank()) +
ylab("Average embedding similarity")
Plot average embedding_similarity based on whether they used the same particle (embedding similarity should be higher if they did?). This visualisation is doing my head in so I am recording as Same or Different particle as partner.
priming.data.for.analysis$SameParticle <- ifelse(priming.data.for.analysis$ParticleRecoded==priming.data.for.analysis$PartnerLastParticle,"SameParticle","DifferentParticle")
Looks like SameParticle (i.e. primed) trials have higher similarity, which makes sense.
ggplot(data=subset(priming.data.for.analysis,!is.na(ParticleRecodedBinary)),aes(y=embedding_similarity, x=SameParticle)) +
facet_grid(Condition~NCategories) +
stat_summary(geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
#geom_dotplot(aes(x=PartnerLastParticle, y=ParticleRecodedBinary, fill=Condition),binaxis='y',stackdir="center", binwidth = .025, binpositions='all', alpha=0.5) +
theme_bw() #+
# scale_fill_manual(values=my.colours) +
# theme(legend.position = "none") +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
# xlab("Partner's last production") +
# ylab("Plural marked using Marker 2")
Same plot excluding embedding_similarity=1 trials. Looks like it’s mostly driven by the similarity=1 items in fact!
ggplot(data=subset(priming.data.for.analysis,!is.na(ParticleRecodedBinary) & embedding_similarity<1),aes(y=embedding_similarity, x=SameParticle)) +
facet_grid(Condition~NCategories) +
stat_summary(geom='point', fun='mean', colour='black',fill='black',size=3, shape=23) +
stat_summary(geom='errorbar', fun.data='mean_cl_boot',fun.min="min", fun.max="max",width=0.2) +
theme_bw()
Stat. Just include embedding similarity instead of same vs different noun prime. Both the next two models are singular but since this is
#singular
#priming.model.embedding <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle * scale(embedding_similarity)+ (1 + PartnerLastParticle * scale(embedding_similarity) | Chain/ParticipantID), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#singular
#priming.model.embedding <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle * scale(embedding_similarity)+ (1 + PartnerLastParticle * scale(embedding_similarity) | Chain) + (1 + PartnerLastParticle + scale(embedding_similarity) | ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#singular
#priming.model.embedding <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle * scale(embedding_similarity)+ (1 + PartnerLastParticle * scale(embedding_similarity) | Chain) + (1 + PartnerLastParticle | ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
#boiling right down to the model that converged above
priming.model.embedding <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle * scale(embedding_similarity) + (1 | Chain) + (1 + PartnerLastParticle | ParticipantID:Chain), data=priming.data.for.analysis, family=binomial, control=glmerControl(optimizer="bobyqa"))
There is a clear similarity boost: PartnerLastParticle1:scale(embedding_similarity) is significant and positive, i.e. bigger similarity lead to more priming.
summary(priming.model.embedding)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle *
## scale(embedding_similarity) + (1 | Chain) + (1 + PartnerLastParticle |
## ParticipantID:Chain)
## Data: priming.data.for.analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 6703.5 6890.0 -3323.8 6647.5 5732
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5652 -0.8209 0.1497 0.7909 4.4543
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.67119 0.8193
## PartnerLastParticle1 0.07863 0.2804 0.12
## Chain (Intercept) 0.92289 0.9607
## Number of obs: 5760, groups: ParticipantID:Chain, 124; Chain, 92
##
## Fixed effects:
## Estimate
## (Intercept) 0.155250
## Condition1 0.145960
## Condition2 0.083472
## NCategories1 -0.236030
## PartnerLastParticle1 0.335956
## scale(embedding_similarity) 0.045687
## Condition1:NCategories1 -0.030107
## Condition2:NCategories1 -0.097020
## Condition1:PartnerLastParticle1 -0.009301
## Condition2:PartnerLastParticle1 0.046263
## NCategories1:PartnerLastParticle1 0.074874
## Condition1:scale(embedding_similarity) 0.026682
## Condition2:scale(embedding_similarity) 0.021335
## NCategories1:scale(embedding_similarity) -0.057955
## PartnerLastParticle1:scale(embedding_similarity) 0.121753
## Condition1:NCategories1:PartnerLastParticle1 0.018124
## Condition2:NCategories1:PartnerLastParticle1 -0.007659
## Condition1:NCategories1:scale(embedding_similarity) 0.016648
## Condition2:NCategories1:scale(embedding_similarity) 0.016419
## Condition1:PartnerLastParticle1:scale(embedding_similarity) 0.059448
## Condition2:PartnerLastParticle1:scale(embedding_similarity) 0.025907
## NCategories1:PartnerLastParticle1:scale(embedding_similarity) -0.012642
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) -0.075215
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity) -0.032126
## Std. Error
## (Intercept) 0.132838
## Condition1 0.169605
## Condition2 0.089709
## NCategories1 0.133023
## PartnerLastParticle1 0.044576
## scale(embedding_similarity) 0.035213
## Condition1:NCategories1 0.169604
## Condition2:NCategories1 0.089915
## Condition1:PartnerLastParticle1 0.056536
## Condition2:PartnerLastParticle1 0.030159
## NCategories1:PartnerLastParticle1 0.044360
## Condition1:scale(embedding_similarity) 0.045698
## Condition2:scale(embedding_similarity) 0.023303
## NCategories1:scale(embedding_similarity) 0.035199
## PartnerLastParticle1:scale(embedding_similarity) 0.035181
## Condition1:NCategories1:PartnerLastParticle1 0.056528
## Condition2:NCategories1:PartnerLastParticle1 0.029853
## Condition1:NCategories1:scale(embedding_similarity) 0.045710
## Condition2:NCategories1:scale(embedding_similarity) 0.023302
## Condition1:PartnerLastParticle1:scale(embedding_similarity) 0.045665
## Condition2:PartnerLastParticle1:scale(embedding_similarity) 0.023278
## NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.035167
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.045671
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.023276
## z value
## (Intercept) 1.169
## Condition1 0.861
## Condition2 0.930
## NCategories1 -1.774
## PartnerLastParticle1 7.537
## scale(embedding_similarity) 1.297
## Condition1:NCategories1 -0.178
## Condition2:NCategories1 -1.079
## Condition1:PartnerLastParticle1 -0.165
## Condition2:PartnerLastParticle1 1.534
## NCategories1:PartnerLastParticle1 1.688
## Condition1:scale(embedding_similarity) 0.584
## Condition2:scale(embedding_similarity) 0.916
## NCategories1:scale(embedding_similarity) -1.647
## PartnerLastParticle1:scale(embedding_similarity) 3.461
## Condition1:NCategories1:PartnerLastParticle1 0.321
## Condition2:NCategories1:PartnerLastParticle1 -0.257
## Condition1:NCategories1:scale(embedding_similarity) 0.364
## Condition2:NCategories1:scale(embedding_similarity) 0.705
## Condition1:PartnerLastParticle1:scale(embedding_similarity) 1.302
## Condition2:PartnerLastParticle1:scale(embedding_similarity) 1.113
## NCategories1:PartnerLastParticle1:scale(embedding_similarity) -0.359
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) -1.647
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity) -1.380
## Pr(>|z|)
## (Intercept) 0.242519
## Condition1 0.389464
## Condition2 0.352122
## NCategories1 0.076005
## PartnerLastParticle1 4.82e-14
## scale(embedding_similarity) 0.194476
## Condition1:NCategories1 0.859106
## Condition2:NCategories1 0.280580
## Condition1:PartnerLastParticle1 0.869330
## Condition2:PartnerLastParticle1 0.125042
## NCategories1:PartnerLastParticle1 0.091433
## Condition1:scale(embedding_similarity) 0.559306
## Condition2:scale(embedding_similarity) 0.359905
## NCategories1:scale(embedding_similarity) 0.099661
## PartnerLastParticle1:scale(embedding_similarity) 0.000539
## Condition1:NCategories1:PartnerLastParticle1 0.748492
## Condition2:NCategories1:PartnerLastParticle1 0.797520
## Condition1:NCategories1:scale(embedding_similarity) 0.715697
## Condition2:NCategories1:scale(embedding_similarity) 0.481060
## Condition1:PartnerLastParticle1:scale(embedding_similarity) 0.192971
## Condition2:PartnerLastParticle1:scale(embedding_similarity) 0.265738
## NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.719222
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.099583
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.167524
##
## (Intercept)
## Condition1
## Condition2
## NCategories1 .
## PartnerLastParticle1 ***
## scale(embedding_similarity)
## Condition1:NCategories1
## Condition2:NCategories1
## Condition1:PartnerLastParticle1
## Condition2:PartnerLastParticle1
## NCategories1:PartnerLastParticle1 .
## Condition1:scale(embedding_similarity)
## Condition2:scale(embedding_similarity)
## NCategories1:scale(embedding_similarity) .
## PartnerLastParticle1:scale(embedding_similarity) ***
## Condition1:NCategories1:PartnerLastParticle1
## Condition2:NCategories1:PartnerLastParticle1
## Condition1:NCategories1:scale(embedding_similarity)
## Condition2:NCategories1:scale(embedding_similarity)
## Condition1:PartnerLastParticle1:scale(embedding_similarity)
## Condition2:PartnerLastParticle1:scale(embedding_similarity)
## NCategories1:PartnerLastParticle1:scale(embedding_similarity)
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) .
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 24 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
Try the same analysis excluding similarity=1 items.
priming.model.embedding2 <- glmer(ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle * scale(embedding_similarity)+ (1 | Chain) + (1 + PartnerLastParticle | ParticipantID:Chain), data=subset(priming.data.for.analysis,embedding_similarity<1), family=binomial, control=glmerControl(optimizer="bobyqa"))
Now the PartnerLastParticle1:scale(embedding_similarity) similarity effect goes away.
summary(priming.model.embedding2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## ParticleRecodedBinary ~ Condition * NCategories * PartnerLastParticle *
## scale(embedding_similarity) + (1 | Chain) + (1 + PartnerLastParticle |
## ParticipantID:Chain)
## Data: subset(priming.data.for.analysis, embedding_similarity < 1)
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 6393.3 6578.2 -3168.7 6337.3 5425
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4993 -0.8281 0.1503 0.7975 4.4088
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ParticipantID:Chain (Intercept) 0.68479 0.8275
## PartnerLastParticle1 0.07341 0.2709 0.15
## Chain (Intercept) 0.94002 0.9695
## Number of obs: 5453, groups: ParticipantID:Chain, 124; Chain, 92
##
## Fixed effects:
## Estimate
## (Intercept) 0.1458100
## Condition1 0.1450857
## Condition2 0.0796467
## NCategories1 -0.2385022
## PartnerLastParticle1 0.3044808
## scale(embedding_similarity) 0.0080554
## Condition1:NCategories1 -0.0338889
## Condition2:NCategories1 -0.0928288
## Condition1:PartnerLastParticle1 -0.0138782
## Condition2:PartnerLastParticle1 0.0467926
## NCategories1:PartnerLastParticle1 0.0630090
## Condition1:scale(embedding_similarity) 0.0132600
## Condition2:scale(embedding_similarity) 0.0421749
## NCategories1:scale(embedding_similarity) -0.0316601
## PartnerLastParticle1:scale(embedding_similarity) 0.0300853
## Condition1:NCategories1:PartnerLastParticle1 0.0405939
## Condition2:NCategories1:PartnerLastParticle1 0.0013881
## Condition1:NCategories1:scale(embedding_similarity) 0.0167763
## Condition2:NCategories1:scale(embedding_similarity) 0.0006372
## Condition1:PartnerLastParticle1:scale(embedding_similarity) 0.0680031
## Condition2:PartnerLastParticle1:scale(embedding_similarity) 0.0107920
## NCategories1:PartnerLastParticle1:scale(embedding_similarity) -0.0033778
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) -0.0400571
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.0014335
## Std. Error
## (Intercept) 0.1347288
## Condition1 0.1721225
## Condition2 0.0909406
## NCategories1 0.1349357
## PartnerLastParticle1 0.0461363
## scale(embedding_similarity) 0.0370048
## Condition1:NCategories1 0.1721300
## Condition2:NCategories1 0.0911451
## Condition1:PartnerLastParticle1 0.0586930
## Condition2:PartnerLastParticle1 0.0311819
## NCategories1:PartnerLastParticle1 0.0460135
## Condition1:scale(embedding_similarity) 0.0485666
## Condition2:scale(embedding_similarity) 0.0241621
## NCategories1:scale(embedding_similarity) 0.0370146
## PartnerLastParticle1:scale(embedding_similarity) 0.0367791
## Condition1:NCategories1:PartnerLastParticle1 0.0587013
## Condition2:NCategories1:PartnerLastParticle1 0.0309028
## Condition1:NCategories1:scale(embedding_similarity) 0.0485443
## Condition2:NCategories1:scale(embedding_similarity) 0.0241555
## Condition1:PartnerLastParticle1:scale(embedding_similarity) 0.0482620
## Condition2:PartnerLastParticle1:scale(embedding_similarity) 0.0240085
## NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.0367762
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.0482564
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.0240138
## z value
## (Intercept) 1.082
## Condition1 0.843
## Condition2 0.876
## NCategories1 -1.768
## PartnerLastParticle1 6.600
## scale(embedding_similarity) 0.218
## Condition1:NCategories1 -0.197
## Condition2:NCategories1 -1.018
## Condition1:PartnerLastParticle1 -0.236
## Condition2:PartnerLastParticle1 1.501
## NCategories1:PartnerLastParticle1 1.369
## Condition1:scale(embedding_similarity) 0.273
## Condition2:scale(embedding_similarity) 1.745
## NCategories1:scale(embedding_similarity) -0.855
## PartnerLastParticle1:scale(embedding_similarity) 0.818
## Condition1:NCategories1:PartnerLastParticle1 0.692
## Condition2:NCategories1:PartnerLastParticle1 0.045
## Condition1:NCategories1:scale(embedding_similarity) 0.346
## Condition2:NCategories1:scale(embedding_similarity) 0.026
## Condition1:PartnerLastParticle1:scale(embedding_similarity) 1.409
## Condition2:PartnerLastParticle1:scale(embedding_similarity) 0.450
## NCategories1:PartnerLastParticle1:scale(embedding_similarity) -0.092
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) -0.830
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.060
## Pr(>|z|)
## (Intercept) 0.2791
## Condition1 0.3993
## Condition2 0.3811
## NCategories1 0.0771
## PartnerLastParticle1 4.12e-11
## scale(embedding_similarity) 0.8277
## Condition1:NCategories1 0.8439
## Condition2:NCategories1 0.3085
## Condition1:PartnerLastParticle1 0.8131
## Condition2:PartnerLastParticle1 0.1335
## NCategories1:PartnerLastParticle1 0.1709
## Condition1:scale(embedding_similarity) 0.7848
## Condition2:scale(embedding_similarity) 0.0809
## NCategories1:scale(embedding_similarity) 0.3924
## PartnerLastParticle1:scale(embedding_similarity) 0.4134
## Condition1:NCategories1:PartnerLastParticle1 0.4892
## Condition2:NCategories1:PartnerLastParticle1 0.9642
## Condition1:NCategories1:scale(embedding_similarity) 0.7297
## Condition2:NCategories1:scale(embedding_similarity) 0.9790
## Condition1:PartnerLastParticle1:scale(embedding_similarity) 0.1588
## Condition2:PartnerLastParticle1:scale(embedding_similarity) 0.6531
## NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.9268
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.4065
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity) 0.9524
##
## (Intercept)
## Condition1
## Condition2
## NCategories1 .
## PartnerLastParticle1 ***
## scale(embedding_similarity)
## Condition1:NCategories1
## Condition2:NCategories1
## Condition1:PartnerLastParticle1
## Condition2:PartnerLastParticle1
## NCategories1:PartnerLastParticle1
## Condition1:scale(embedding_similarity)
## Condition2:scale(embedding_similarity) .
## NCategories1:scale(embedding_similarity)
## PartnerLastParticle1:scale(embedding_similarity)
## Condition1:NCategories1:PartnerLastParticle1
## Condition2:NCategories1:PartnerLastParticle1
## Condition1:NCategories1:scale(embedding_similarity)
## Condition2:NCategories1:scale(embedding_similarity)
## Condition1:PartnerLastParticle1:scale(embedding_similarity)
## Condition2:PartnerLastParticle1:scale(embedding_similarity)
## NCategories1:PartnerLastParticle1:scale(embedding_similarity)
## Condition1:NCategories1:PartnerLastParticle1:scale(embedding_similarity)
## Condition2:NCategories1:PartnerLastParticle1:scale(embedding_similarity)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 24 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it